Training in progress, step 2200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 335604696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b74ba6f9e29db97fc90e486f0c1783a62ecc7d2714333c2aaeb33374411e6fd
|
| 3 |
size 335604696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 170920532
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:485b0650e69a29f4057c433b6ae8526874cce5964e0f430921d5e647c9c27726
|
| 3 |
size 170920532
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03955121740f9159b88a9dcfce2198a72d7f105344e5b8eec34ff63d217f6a96
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c839af19f6ebafaebdaeaec16b09e7bb2e0e3e560872fd9ab9c2de368bc40c7
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.11928685009479523,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1600",
|
| 4 |
-
"epoch": 2.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -14095,6 +14095,1414 @@
|
|
| 14095 |
"eval_samples_per_second": 8.986,
|
| 14096 |
"eval_steps_per_second": 4.621,
|
| 14097 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14098 |
}
|
| 14099 |
],
|
| 14100 |
"logging_steps": 1,
|
|
@@ -14109,7 +15517,7 @@
|
|
| 14109 |
"early_stopping_threshold": 0.0
|
| 14110 |
},
|
| 14111 |
"attributes": {
|
| 14112 |
-
"early_stopping_patience_counter":
|
| 14113 |
}
|
| 14114 |
},
|
| 14115 |
"TrainerControl": {
|
|
@@ -14118,12 +15526,12 @@
|
|
| 14118 |
"should_evaluate": false,
|
| 14119 |
"should_log": false,
|
| 14120 |
"should_save": true,
|
| 14121 |
-
"should_training_stop":
|
| 14122 |
},
|
| 14123 |
"attributes": {}
|
| 14124 |
}
|
| 14125 |
},
|
| 14126 |
-
"total_flos":
|
| 14127 |
"train_batch_size": 2,
|
| 14128 |
"trial_name": null,
|
| 14129 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.11928685009479523,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1600",
|
| 4 |
+
"epoch": 2.540415704387991,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 2200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 14095 |
"eval_samples_per_second": 8.986,
|
| 14096 |
"eval_steps_per_second": 4.621,
|
| 14097 |
"step": 2000
|
| 14098 |
+
},
|
| 14099 |
+
{
|
| 14100 |
+
"epoch": 2.3106235565819864,
|
| 14101 |
+
"grad_norm": 0.7799156904220581,
|
| 14102 |
+
"learning_rate": 2.5505697306745257e-05,
|
| 14103 |
+
"loss": 0.049,
|
| 14104 |
+
"step": 2001
|
| 14105 |
+
},
|
| 14106 |
+
{
|
| 14107 |
+
"epoch": 2.3117782909930717,
|
| 14108 |
+
"grad_norm": 0.19763898849487305,
|
| 14109 |
+
"learning_rate": 2.5424139181418526e-05,
|
| 14110 |
+
"loss": 0.0111,
|
| 14111 |
+
"step": 2002
|
| 14112 |
+
},
|
| 14113 |
+
{
|
| 14114 |
+
"epoch": 2.312933025404157,
|
| 14115 |
+
"grad_norm": 0.49112677574157715,
|
| 14116 |
+
"learning_rate": 2.5342692667330305e-05,
|
| 14117 |
+
"loss": 0.0496,
|
| 14118 |
+
"step": 2003
|
| 14119 |
+
},
|
| 14120 |
+
{
|
| 14121 |
+
"epoch": 2.3140877598152425,
|
| 14122 |
+
"grad_norm": 0.4861849546432495,
|
| 14123 |
+
"learning_rate": 2.5261357886374583e-05,
|
| 14124 |
+
"loss": 0.0361,
|
| 14125 |
+
"step": 2004
|
| 14126 |
+
},
|
| 14127 |
+
{
|
| 14128 |
+
"epoch": 2.315242494226328,
|
| 14129 |
+
"grad_norm": 0.3707939684391022,
|
| 14130 |
+
"learning_rate": 2.5180134960278067e-05,
|
| 14131 |
+
"loss": 0.0276,
|
| 14132 |
+
"step": 2005
|
| 14133 |
+
},
|
| 14134 |
+
{
|
| 14135 |
+
"epoch": 2.3163972286374133,
|
| 14136 |
+
"grad_norm": 0.38369622826576233,
|
| 14137 |
+
"learning_rate": 2.5099024010600137e-05,
|
| 14138 |
+
"loss": 0.0376,
|
| 14139 |
+
"step": 2006
|
| 14140 |
+
},
|
| 14141 |
+
{
|
| 14142 |
+
"epoch": 2.3175519630484986,
|
| 14143 |
+
"grad_norm": 0.4379388391971588,
|
| 14144 |
+
"learning_rate": 2.5018025158732472e-05,
|
| 14145 |
+
"loss": 0.0441,
|
| 14146 |
+
"step": 2007
|
| 14147 |
+
},
|
| 14148 |
+
{
|
| 14149 |
+
"epoch": 2.3187066974595845,
|
| 14150 |
+
"grad_norm": 0.22679738700389862,
|
| 14151 |
+
"learning_rate": 2.4937138525899063e-05,
|
| 14152 |
+
"loss": 0.014,
|
| 14153 |
+
"step": 2008
|
| 14154 |
+
},
|
| 14155 |
+
{
|
| 14156 |
+
"epoch": 2.31986143187067,
|
| 14157 |
+
"grad_norm": 0.4188809096813202,
|
| 14158 |
+
"learning_rate": 2.4856364233155905e-05,
|
| 14159 |
+
"loss": 0.0391,
|
| 14160 |
+
"step": 2009
|
| 14161 |
+
},
|
| 14162 |
+
{
|
| 14163 |
+
"epoch": 2.321016166281755,
|
| 14164 |
+
"grad_norm": 0.5873955488204956,
|
| 14165 |
+
"learning_rate": 2.4775702401391e-05,
|
| 14166 |
+
"loss": 0.0403,
|
| 14167 |
+
"step": 2010
|
| 14168 |
+
},
|
| 14169 |
+
{
|
| 14170 |
+
"epoch": 2.3221709006928406,
|
| 14171 |
+
"grad_norm": 0.44887372851371765,
|
| 14172 |
+
"learning_rate": 2.4695153151323824e-05,
|
| 14173 |
+
"loss": 0.0334,
|
| 14174 |
+
"step": 2011
|
| 14175 |
+
},
|
| 14176 |
+
{
|
| 14177 |
+
"epoch": 2.323325635103926,
|
| 14178 |
+
"grad_norm": 0.37314853072166443,
|
| 14179 |
+
"learning_rate": 2.4614716603505507e-05,
|
| 14180 |
+
"loss": 0.0319,
|
| 14181 |
+
"step": 2012
|
| 14182 |
+
},
|
| 14183 |
+
{
|
| 14184 |
+
"epoch": 2.3244803695150114,
|
| 14185 |
+
"grad_norm": 0.5980908274650574,
|
| 14186 |
+
"learning_rate": 2.453439287831848e-05,
|
| 14187 |
+
"loss": 0.0544,
|
| 14188 |
+
"step": 2013
|
| 14189 |
+
},
|
| 14190 |
+
{
|
| 14191 |
+
"epoch": 2.325635103926097,
|
| 14192 |
+
"grad_norm": 0.6265202164649963,
|
| 14193 |
+
"learning_rate": 2.445418209597632e-05,
|
| 14194 |
+
"loss": 0.066,
|
| 14195 |
+
"step": 2014
|
| 14196 |
+
},
|
| 14197 |
+
{
|
| 14198 |
+
"epoch": 2.3267898383371826,
|
| 14199 |
+
"grad_norm": 0.4202909469604492,
|
| 14200 |
+
"learning_rate": 2.437408437652353e-05,
|
| 14201 |
+
"loss": 0.0279,
|
| 14202 |
+
"step": 2015
|
| 14203 |
+
},
|
| 14204 |
+
{
|
| 14205 |
+
"epoch": 2.327944572748268,
|
| 14206 |
+
"grad_norm": 0.334140807390213,
|
| 14207 |
+
"learning_rate": 2.429409983983546e-05,
|
| 14208 |
+
"loss": 0.0275,
|
| 14209 |
+
"step": 2016
|
| 14210 |
+
},
|
| 14211 |
+
{
|
| 14212 |
+
"epoch": 2.3290993071593533,
|
| 14213 |
+
"grad_norm": 0.30608707666397095,
|
| 14214 |
+
"learning_rate": 2.421422860561804e-05,
|
| 14215 |
+
"loss": 0.0238,
|
| 14216 |
+
"step": 2017
|
| 14217 |
+
},
|
| 14218 |
+
{
|
| 14219 |
+
"epoch": 2.3302540415704387,
|
| 14220 |
+
"grad_norm": 0.4151909649372101,
|
| 14221 |
+
"learning_rate": 2.413447079340764e-05,
|
| 14222 |
+
"loss": 0.0398,
|
| 14223 |
+
"step": 2018
|
| 14224 |
+
},
|
| 14225 |
+
{
|
| 14226 |
+
"epoch": 2.331408775981524,
|
| 14227 |
+
"grad_norm": 0.509787917137146,
|
| 14228 |
+
"learning_rate": 2.405482652257086e-05,
|
| 14229 |
+
"loss": 0.0452,
|
| 14230 |
+
"step": 2019
|
| 14231 |
+
},
|
| 14232 |
+
{
|
| 14233 |
+
"epoch": 2.3325635103926095,
|
| 14234 |
+
"grad_norm": 0.6172803044319153,
|
| 14235 |
+
"learning_rate": 2.397529591230442e-05,
|
| 14236 |
+
"loss": 0.0603,
|
| 14237 |
+
"step": 2020
|
| 14238 |
+
},
|
| 14239 |
+
{
|
| 14240 |
+
"epoch": 2.3337182448036953,
|
| 14241 |
+
"grad_norm": 0.6698511242866516,
|
| 14242 |
+
"learning_rate": 2.389587908163491e-05,
|
| 14243 |
+
"loss": 0.0648,
|
| 14244 |
+
"step": 2021
|
| 14245 |
+
},
|
| 14246 |
+
{
|
| 14247 |
+
"epoch": 2.3348729792147807,
|
| 14248 |
+
"grad_norm": 0.2730010449886322,
|
| 14249 |
+
"learning_rate": 2.381657614941858e-05,
|
| 14250 |
+
"loss": 0.0177,
|
| 14251 |
+
"step": 2022
|
| 14252 |
+
},
|
| 14253 |
+
{
|
| 14254 |
+
"epoch": 2.336027713625866,
|
| 14255 |
+
"grad_norm": 0.47910624742507935,
|
| 14256 |
+
"learning_rate": 2.373738723434129e-05,
|
| 14257 |
+
"loss": 0.0589,
|
| 14258 |
+
"step": 2023
|
| 14259 |
+
},
|
| 14260 |
+
{
|
| 14261 |
+
"epoch": 2.3371824480369514,
|
| 14262 |
+
"grad_norm": 0.4585254490375519,
|
| 14263 |
+
"learning_rate": 2.365831245491824e-05,
|
| 14264 |
+
"loss": 0.0536,
|
| 14265 |
+
"step": 2024
|
| 14266 |
+
},
|
| 14267 |
+
{
|
| 14268 |
+
"epoch": 2.338337182448037,
|
| 14269 |
+
"grad_norm": 0.4987964332103729,
|
| 14270 |
+
"learning_rate": 2.357935192949382e-05,
|
| 14271 |
+
"loss": 0.0301,
|
| 14272 |
+
"step": 2025
|
| 14273 |
+
},
|
| 14274 |
+
{
|
| 14275 |
+
"epoch": 2.3394919168591226,
|
| 14276 |
+
"grad_norm": 0.27542567253112793,
|
| 14277 |
+
"learning_rate": 2.35005057762414e-05,
|
| 14278 |
+
"loss": 0.0135,
|
| 14279 |
+
"step": 2026
|
| 14280 |
+
},
|
| 14281 |
+
{
|
| 14282 |
+
"epoch": 2.340646651270208,
|
| 14283 |
+
"grad_norm": 0.36027616262435913,
|
| 14284 |
+
"learning_rate": 2.3421774113163208e-05,
|
| 14285 |
+
"loss": 0.0328,
|
| 14286 |
+
"step": 2027
|
| 14287 |
+
},
|
| 14288 |
+
{
|
| 14289 |
+
"epoch": 2.3418013856812934,
|
| 14290 |
+
"grad_norm": 0.4746532440185547,
|
| 14291 |
+
"learning_rate": 2.334315705809015e-05,
|
| 14292 |
+
"loss": 0.0486,
|
| 14293 |
+
"step": 2028
|
| 14294 |
+
},
|
| 14295 |
+
{
|
| 14296 |
+
"epoch": 2.3429561200923787,
|
| 14297 |
+
"grad_norm": 0.4249344766139984,
|
| 14298 |
+
"learning_rate": 2.326465472868149e-05,
|
| 14299 |
+
"loss": 0.0378,
|
| 14300 |
+
"step": 2029
|
| 14301 |
+
},
|
| 14302 |
+
{
|
| 14303 |
+
"epoch": 2.344110854503464,
|
| 14304 |
+
"grad_norm": 0.27690279483795166,
|
| 14305 |
+
"learning_rate": 2.3186267242424907e-05,
|
| 14306 |
+
"loss": 0.0389,
|
| 14307 |
+
"step": 2030
|
| 14308 |
+
},
|
| 14309 |
+
{
|
| 14310 |
+
"epoch": 2.3452655889145495,
|
| 14311 |
+
"grad_norm": 0.2868942618370056,
|
| 14312 |
+
"learning_rate": 2.3107994716636182e-05,
|
| 14313 |
+
"loss": 0.0164,
|
| 14314 |
+
"step": 2031
|
| 14315 |
+
},
|
| 14316 |
+
{
|
| 14317 |
+
"epoch": 2.346420323325635,
|
| 14318 |
+
"grad_norm": 0.4187156558036804,
|
| 14319 |
+
"learning_rate": 2.302983726845902e-05,
|
| 14320 |
+
"loss": 0.0427,
|
| 14321 |
+
"step": 2032
|
| 14322 |
+
},
|
| 14323 |
+
{
|
| 14324 |
+
"epoch": 2.3475750577367207,
|
| 14325 |
+
"grad_norm": 0.3954056203365326,
|
| 14326 |
+
"learning_rate": 2.295179501486492e-05,
|
| 14327 |
+
"loss": 0.0387,
|
| 14328 |
+
"step": 2033
|
| 14329 |
+
},
|
| 14330 |
+
{
|
| 14331 |
+
"epoch": 2.348729792147806,
|
| 14332 |
+
"grad_norm": 0.42441728711128235,
|
| 14333 |
+
"learning_rate": 2.287386807265297e-05,
|
| 14334 |
+
"loss": 0.0392,
|
| 14335 |
+
"step": 2034
|
| 14336 |
+
},
|
| 14337 |
+
{
|
| 14338 |
+
"epoch": 2.3498845265588915,
|
| 14339 |
+
"grad_norm": 0.3568964898586273,
|
| 14340 |
+
"learning_rate": 2.2796056558449676e-05,
|
| 14341 |
+
"loss": 0.0268,
|
| 14342 |
+
"step": 2035
|
| 14343 |
+
},
|
| 14344 |
+
{
|
| 14345 |
+
"epoch": 2.351039260969977,
|
| 14346 |
+
"grad_norm": 0.3511045575141907,
|
| 14347 |
+
"learning_rate": 2.271836058870882e-05,
|
| 14348 |
+
"loss": 0.035,
|
| 14349 |
+
"step": 2036
|
| 14350 |
+
},
|
| 14351 |
+
{
|
| 14352 |
+
"epoch": 2.352193995381062,
|
| 14353 |
+
"grad_norm": 0.3731344938278198,
|
| 14354 |
+
"learning_rate": 2.2640780279711205e-05,
|
| 14355 |
+
"loss": 0.0326,
|
| 14356 |
+
"step": 2037
|
| 14357 |
+
},
|
| 14358 |
+
{
|
| 14359 |
+
"epoch": 2.353348729792148,
|
| 14360 |
+
"grad_norm": 0.5572566986083984,
|
| 14361 |
+
"learning_rate": 2.2563315747564573e-05,
|
| 14362 |
+
"loss": 0.0264,
|
| 14363 |
+
"step": 2038
|
| 14364 |
+
},
|
| 14365 |
+
{
|
| 14366 |
+
"epoch": 2.3545034642032334,
|
| 14367 |
+
"grad_norm": 0.4591655433177948,
|
| 14368 |
+
"learning_rate": 2.2485967108203398e-05,
|
| 14369 |
+
"loss": 0.0394,
|
| 14370 |
+
"step": 2039
|
| 14371 |
+
},
|
| 14372 |
+
{
|
| 14373 |
+
"epoch": 2.355658198614319,
|
| 14374 |
+
"grad_norm": 0.43836095929145813,
|
| 14375 |
+
"learning_rate": 2.2408734477388715e-05,
|
| 14376 |
+
"loss": 0.0414,
|
| 14377 |
+
"step": 2040
|
| 14378 |
+
},
|
| 14379 |
+
{
|
| 14380 |
+
"epoch": 2.356812933025404,
|
| 14381 |
+
"grad_norm": 0.5573940277099609,
|
| 14382 |
+
"learning_rate": 2.2331617970707853e-05,
|
| 14383 |
+
"loss": 0.0464,
|
| 14384 |
+
"step": 2041
|
| 14385 |
+
},
|
| 14386 |
+
{
|
| 14387 |
+
"epoch": 2.3579676674364896,
|
| 14388 |
+
"grad_norm": 0.502055287361145,
|
| 14389 |
+
"learning_rate": 2.225461770357449e-05,
|
| 14390 |
+
"loss": 0.0357,
|
| 14391 |
+
"step": 2042
|
| 14392 |
+
},
|
| 14393 |
+
{
|
| 14394 |
+
"epoch": 2.359122401847575,
|
| 14395 |
+
"grad_norm": 0.4264501631259918,
|
| 14396 |
+
"learning_rate": 2.217773379122826e-05,
|
| 14397 |
+
"loss": 0.0469,
|
| 14398 |
+
"step": 2043
|
| 14399 |
+
},
|
| 14400 |
+
{
|
| 14401 |
+
"epoch": 2.3602771362586603,
|
| 14402 |
+
"grad_norm": 0.4511984884738922,
|
| 14403 |
+
"learning_rate": 2.210096634873461e-05,
|
| 14404 |
+
"loss": 0.0333,
|
| 14405 |
+
"step": 2044
|
| 14406 |
+
},
|
| 14407 |
+
{
|
| 14408 |
+
"epoch": 2.361431870669746,
|
| 14409 |
+
"grad_norm": 0.4255891740322113,
|
| 14410 |
+
"learning_rate": 2.2024315490984758e-05,
|
| 14411 |
+
"loss": 0.0546,
|
| 14412 |
+
"step": 2045
|
| 14413 |
+
},
|
| 14414 |
+
{
|
| 14415 |
+
"epoch": 2.3625866050808315,
|
| 14416 |
+
"grad_norm": 0.2232721894979477,
|
| 14417 |
+
"learning_rate": 2.1947781332695404e-05,
|
| 14418 |
+
"loss": 0.017,
|
| 14419 |
+
"step": 2046
|
| 14420 |
+
},
|
| 14421 |
+
{
|
| 14422 |
+
"epoch": 2.363741339491917,
|
| 14423 |
+
"grad_norm": 0.7413299679756165,
|
| 14424 |
+
"learning_rate": 2.1871363988408646e-05,
|
| 14425 |
+
"loss": 0.0618,
|
| 14426 |
+
"step": 2047
|
| 14427 |
+
},
|
| 14428 |
+
{
|
| 14429 |
+
"epoch": 2.3648960739030023,
|
| 14430 |
+
"grad_norm": 0.3277047276496887,
|
| 14431 |
+
"learning_rate": 2.1795063572491635e-05,
|
| 14432 |
+
"loss": 0.0265,
|
| 14433 |
+
"step": 2048
|
| 14434 |
+
},
|
| 14435 |
+
{
|
| 14436 |
+
"epoch": 2.3660508083140877,
|
| 14437 |
+
"grad_norm": 0.5271016955375671,
|
| 14438 |
+
"learning_rate": 2.1718880199136614e-05,
|
| 14439 |
+
"loss": 0.0366,
|
| 14440 |
+
"step": 2049
|
| 14441 |
+
},
|
| 14442 |
+
{
|
| 14443 |
+
"epoch": 2.367205542725173,
|
| 14444 |
+
"grad_norm": 0.36835989356040955,
|
| 14445 |
+
"learning_rate": 2.1642813982360733e-05,
|
| 14446 |
+
"loss": 0.0281,
|
| 14447 |
+
"step": 2050
|
| 14448 |
+
},
|
| 14449 |
+
{
|
| 14450 |
+
"epoch": 2.368360277136259,
|
| 14451 |
+
"grad_norm": 0.39288330078125,
|
| 14452 |
+
"learning_rate": 2.1566865036005625e-05,
|
| 14453 |
+
"loss": 0.0402,
|
| 14454 |
+
"step": 2051
|
| 14455 |
+
},
|
| 14456 |
+
{
|
| 14457 |
+
"epoch": 2.3695150115473442,
|
| 14458 |
+
"grad_norm": 0.25975924730300903,
|
| 14459 |
+
"learning_rate": 2.1491033473737533e-05,
|
| 14460 |
+
"loss": 0.0124,
|
| 14461 |
+
"step": 2052
|
| 14462 |
+
},
|
| 14463 |
+
{
|
| 14464 |
+
"epoch": 2.3706697459584296,
|
| 14465 |
+
"grad_norm": 0.4355078339576721,
|
| 14466 |
+
"learning_rate": 2.1415319409047006e-05,
|
| 14467 |
+
"loss": 0.0338,
|
| 14468 |
+
"step": 2053
|
| 14469 |
+
},
|
| 14470 |
+
{
|
| 14471 |
+
"epoch": 2.371824480369515,
|
| 14472 |
+
"grad_norm": 0.40483564138412476,
|
| 14473 |
+
"learning_rate": 2.1339722955248754e-05,
|
| 14474 |
+
"loss": 0.0323,
|
| 14475 |
+
"step": 2054
|
| 14476 |
+
},
|
| 14477 |
+
{
|
| 14478 |
+
"epoch": 2.3729792147806004,
|
| 14479 |
+
"grad_norm": 0.4985710382461548,
|
| 14480 |
+
"learning_rate": 2.12642442254814e-05,
|
| 14481 |
+
"loss": 0.0401,
|
| 14482 |
+
"step": 2055
|
| 14483 |
+
},
|
| 14484 |
+
{
|
| 14485 |
+
"epoch": 2.3741339491916857,
|
| 14486 |
+
"grad_norm": 0.45005032420158386,
|
| 14487 |
+
"learning_rate": 2.1188883332707443e-05,
|
| 14488 |
+
"loss": 0.038,
|
| 14489 |
+
"step": 2056
|
| 14490 |
+
},
|
| 14491 |
+
{
|
| 14492 |
+
"epoch": 2.375288683602771,
|
| 14493 |
+
"grad_norm": 0.6807300448417664,
|
| 14494 |
+
"learning_rate": 2.1113640389712998e-05,
|
| 14495 |
+
"loss": 0.0444,
|
| 14496 |
+
"step": 2057
|
| 14497 |
+
},
|
| 14498 |
+
{
|
| 14499 |
+
"epoch": 2.376443418013857,
|
| 14500 |
+
"grad_norm": 0.5257799029350281,
|
| 14501 |
+
"learning_rate": 2.1038515509107736e-05,
|
| 14502 |
+
"loss": 0.0751,
|
| 14503 |
+
"step": 2058
|
| 14504 |
+
},
|
| 14505 |
+
{
|
| 14506 |
+
"epoch": 2.3775981524249423,
|
| 14507 |
+
"grad_norm": 0.41407352685928345,
|
| 14508 |
+
"learning_rate": 2.0963508803324494e-05,
|
| 14509 |
+
"loss": 0.0362,
|
| 14510 |
+
"step": 2059
|
| 14511 |
+
},
|
| 14512 |
+
{
|
| 14513 |
+
"epoch": 2.3787528868360277,
|
| 14514 |
+
"grad_norm": 0.5121805667877197,
|
| 14515 |
+
"learning_rate": 2.0888620384619352e-05,
|
| 14516 |
+
"loss": 0.0425,
|
| 14517 |
+
"step": 2060
|
| 14518 |
+
},
|
| 14519 |
+
{
|
| 14520 |
+
"epoch": 2.379907621247113,
|
| 14521 |
+
"grad_norm": 0.41955092549324036,
|
| 14522 |
+
"learning_rate": 2.081385036507131e-05,
|
| 14523 |
+
"loss": 0.0278,
|
| 14524 |
+
"step": 2061
|
| 14525 |
+
},
|
| 14526 |
+
{
|
| 14527 |
+
"epoch": 2.3810623556581985,
|
| 14528 |
+
"grad_norm": 0.44437286257743835,
|
| 14529 |
+
"learning_rate": 2.073919885658223e-05,
|
| 14530 |
+
"loss": 0.0435,
|
| 14531 |
+
"step": 2062
|
| 14532 |
+
},
|
| 14533 |
+
{
|
| 14534 |
+
"epoch": 2.3822170900692843,
|
| 14535 |
+
"grad_norm": 0.35690605640411377,
|
| 14536 |
+
"learning_rate": 2.0664665970876496e-05,
|
| 14537 |
+
"loss": 0.0338,
|
| 14538 |
+
"step": 2063
|
| 14539 |
+
},
|
| 14540 |
+
{
|
| 14541 |
+
"epoch": 2.3833718244803697,
|
| 14542 |
+
"grad_norm": 0.5212721824645996,
|
| 14543 |
+
"learning_rate": 2.059025181950107e-05,
|
| 14544 |
+
"loss": 0.0404,
|
| 14545 |
+
"step": 2064
|
| 14546 |
+
},
|
| 14547 |
+
{
|
| 14548 |
+
"epoch": 2.384526558891455,
|
| 14549 |
+
"grad_norm": 0.3612675666809082,
|
| 14550 |
+
"learning_rate": 2.0515956513825173e-05,
|
| 14551 |
+
"loss": 0.0326,
|
| 14552 |
+
"step": 2065
|
| 14553 |
+
},
|
| 14554 |
+
{
|
| 14555 |
+
"epoch": 2.3856812933025404,
|
| 14556 |
+
"grad_norm": 0.6502024531364441,
|
| 14557 |
+
"learning_rate": 2.0441780165040148e-05,
|
| 14558 |
+
"loss": 0.0626,
|
| 14559 |
+
"step": 2066
|
| 14560 |
+
},
|
| 14561 |
+
{
|
| 14562 |
+
"epoch": 2.386836027713626,
|
| 14563 |
+
"grad_norm": 0.60349041223526,
|
| 14564 |
+
"learning_rate": 2.0367722884159335e-05,
|
| 14565 |
+
"loss": 0.0521,
|
| 14566 |
+
"step": 2067
|
| 14567 |
+
},
|
| 14568 |
+
{
|
| 14569 |
+
"epoch": 2.387990762124711,
|
| 14570 |
+
"grad_norm": 0.44266197085380554,
|
| 14571 |
+
"learning_rate": 2.0293784782017854e-05,
|
| 14572 |
+
"loss": 0.0463,
|
| 14573 |
+
"step": 2068
|
| 14574 |
+
},
|
| 14575 |
+
{
|
| 14576 |
+
"epoch": 2.3891454965357966,
|
| 14577 |
+
"grad_norm": 0.4944154620170593,
|
| 14578 |
+
"learning_rate": 2.021996596927248e-05,
|
| 14579 |
+
"loss": 0.0588,
|
| 14580 |
+
"step": 2069
|
| 14581 |
+
},
|
| 14582 |
+
{
|
| 14583 |
+
"epoch": 2.3903002309468824,
|
| 14584 |
+
"grad_norm": 0.4281655550003052,
|
| 14585 |
+
"learning_rate": 2.0146266556401405e-05,
|
| 14586 |
+
"loss": 0.0403,
|
| 14587 |
+
"step": 2070
|
| 14588 |
+
},
|
| 14589 |
+
{
|
| 14590 |
+
"epoch": 2.3914549653579678,
|
| 14591 |
+
"grad_norm": 0.34924593567848206,
|
| 14592 |
+
"learning_rate": 2.0072686653704186e-05,
|
| 14593 |
+
"loss": 0.0327,
|
| 14594 |
+
"step": 2071
|
| 14595 |
+
},
|
| 14596 |
+
{
|
| 14597 |
+
"epoch": 2.392609699769053,
|
| 14598 |
+
"grad_norm": 0.2811260521411896,
|
| 14599 |
+
"learning_rate": 1.9999226371301495e-05,
|
| 14600 |
+
"loss": 0.0228,
|
| 14601 |
+
"step": 2072
|
| 14602 |
+
},
|
| 14603 |
+
{
|
| 14604 |
+
"epoch": 2.3937644341801385,
|
| 14605 |
+
"grad_norm": 0.35065531730651855,
|
| 14606 |
+
"learning_rate": 1.992588581913497e-05,
|
| 14607 |
+
"loss": 0.035,
|
| 14608 |
+
"step": 2073
|
| 14609 |
+
},
|
| 14610 |
+
{
|
| 14611 |
+
"epoch": 2.394919168591224,
|
| 14612 |
+
"grad_norm": 0.7488703727722168,
|
| 14613 |
+
"learning_rate": 1.9852665106967093e-05,
|
| 14614 |
+
"loss": 0.0665,
|
| 14615 |
+
"step": 2074
|
| 14616 |
+
},
|
| 14617 |
+
{
|
| 14618 |
+
"epoch": 2.3960739030023097,
|
| 14619 |
+
"grad_norm": 0.6108412146568298,
|
| 14620 |
+
"learning_rate": 1.977956434438095e-05,
|
| 14621 |
+
"loss": 0.0642,
|
| 14622 |
+
"step": 2075
|
| 14623 |
+
},
|
| 14624 |
+
{
|
| 14625 |
+
"epoch": 2.397228637413395,
|
| 14626 |
+
"grad_norm": 0.33056890964508057,
|
| 14627 |
+
"learning_rate": 1.9706583640780173e-05,
|
| 14628 |
+
"loss": 0.0209,
|
| 14629 |
+
"step": 2076
|
| 14630 |
+
},
|
| 14631 |
+
{
|
| 14632 |
+
"epoch": 2.3983833718244805,
|
| 14633 |
+
"grad_norm": 0.31268665194511414,
|
| 14634 |
+
"learning_rate": 1.963372310538859e-05,
|
| 14635 |
+
"loss": 0.0272,
|
| 14636 |
+
"step": 2077
|
| 14637 |
+
},
|
| 14638 |
+
{
|
| 14639 |
+
"epoch": 2.399538106235566,
|
| 14640 |
+
"grad_norm": 0.6555933952331543,
|
| 14641 |
+
"learning_rate": 1.9560982847250308e-05,
|
| 14642 |
+
"loss": 0.0545,
|
| 14643 |
+
"step": 2078
|
| 14644 |
+
},
|
| 14645 |
+
{
|
| 14646 |
+
"epoch": 2.4006928406466512,
|
| 14647 |
+
"grad_norm": 0.37114909291267395,
|
| 14648 |
+
"learning_rate": 1.948836297522937e-05,
|
| 14649 |
+
"loss": 0.0286,
|
| 14650 |
+
"step": 2079
|
| 14651 |
+
},
|
| 14652 |
+
{
|
| 14653 |
+
"epoch": 2.4018475750577366,
|
| 14654 |
+
"grad_norm": 0.3505125045776367,
|
| 14655 |
+
"learning_rate": 1.9415863598009676e-05,
|
| 14656 |
+
"loss": 0.0326,
|
| 14657 |
+
"step": 2080
|
| 14658 |
+
},
|
| 14659 |
+
{
|
| 14660 |
+
"epoch": 2.403002309468822,
|
| 14661 |
+
"grad_norm": 0.40207409858703613,
|
| 14662 |
+
"learning_rate": 1.9343484824094694e-05,
|
| 14663 |
+
"loss": 0.0271,
|
| 14664 |
+
"step": 2081
|
| 14665 |
+
},
|
| 14666 |
+
{
|
| 14667 |
+
"epoch": 2.404157043879908,
|
| 14668 |
+
"grad_norm": 0.47750791907310486,
|
| 14669 |
+
"learning_rate": 1.927122676180756e-05,
|
| 14670 |
+
"loss": 0.0472,
|
| 14671 |
+
"step": 2082
|
| 14672 |
+
},
|
| 14673 |
+
{
|
| 14674 |
+
"epoch": 2.405311778290993,
|
| 14675 |
+
"grad_norm": 0.4753887355327606,
|
| 14676 |
+
"learning_rate": 1.9199089519290625e-05,
|
| 14677 |
+
"loss": 0.0556,
|
| 14678 |
+
"step": 2083
|
| 14679 |
+
},
|
| 14680 |
+
{
|
| 14681 |
+
"epoch": 2.4064665127020786,
|
| 14682 |
+
"grad_norm": 0.4524364471435547,
|
| 14683 |
+
"learning_rate": 1.9127073204505498e-05,
|
| 14684 |
+
"loss": 0.0372,
|
| 14685 |
+
"step": 2084
|
| 14686 |
+
},
|
| 14687 |
+
{
|
| 14688 |
+
"epoch": 2.407621247113164,
|
| 14689 |
+
"grad_norm": 0.42430800199508667,
|
| 14690 |
+
"learning_rate": 1.9055177925232703e-05,
|
| 14691 |
+
"loss": 0.0482,
|
| 14692 |
+
"step": 2085
|
| 14693 |
+
},
|
| 14694 |
+
{
|
| 14695 |
+
"epoch": 2.4087759815242493,
|
| 14696 |
+
"grad_norm": 0.44116607308387756,
|
| 14697 |
+
"learning_rate": 1.898340378907172e-05,
|
| 14698 |
+
"loss": 0.0395,
|
| 14699 |
+
"step": 2086
|
| 14700 |
+
},
|
| 14701 |
+
{
|
| 14702 |
+
"epoch": 2.4099307159353347,
|
| 14703 |
+
"grad_norm": 0.5821434855461121,
|
| 14704 |
+
"learning_rate": 1.8911750903440684e-05,
|
| 14705 |
+
"loss": 0.0385,
|
| 14706 |
+
"step": 2087
|
| 14707 |
+
},
|
| 14708 |
+
{
|
| 14709 |
+
"epoch": 2.4110854503464205,
|
| 14710 |
+
"grad_norm": 0.46242496371269226,
|
| 14711 |
+
"learning_rate": 1.8840219375576294e-05,
|
| 14712 |
+
"loss": 0.041,
|
| 14713 |
+
"step": 2088
|
| 14714 |
+
},
|
| 14715 |
+
{
|
| 14716 |
+
"epoch": 2.412240184757506,
|
| 14717 |
+
"grad_norm": 0.43564245104789734,
|
| 14718 |
+
"learning_rate": 1.876880931253352e-05,
|
| 14719 |
+
"loss": 0.0387,
|
| 14720 |
+
"step": 2089
|
| 14721 |
+
},
|
| 14722 |
+
{
|
| 14723 |
+
"epoch": 2.4133949191685913,
|
| 14724 |
+
"grad_norm": 0.4722338318824768,
|
| 14725 |
+
"learning_rate": 1.869752082118571e-05,
|
| 14726 |
+
"loss": 0.0402,
|
| 14727 |
+
"step": 2090
|
| 14728 |
+
},
|
| 14729 |
+
{
|
| 14730 |
+
"epoch": 2.4145496535796767,
|
| 14731 |
+
"grad_norm": 0.35672128200531006,
|
| 14732 |
+
"learning_rate": 1.8626354008224167e-05,
|
| 14733 |
+
"loss": 0.0363,
|
| 14734 |
+
"step": 2091
|
| 14735 |
+
},
|
| 14736 |
+
{
|
| 14737 |
+
"epoch": 2.415704387990762,
|
| 14738 |
+
"grad_norm": 0.37598663568496704,
|
| 14739 |
+
"learning_rate": 1.855530898015807e-05,
|
| 14740 |
+
"loss": 0.0299,
|
| 14741 |
+
"step": 2092
|
| 14742 |
+
},
|
| 14743 |
+
{
|
| 14744 |
+
"epoch": 2.4168591224018474,
|
| 14745 |
+
"grad_norm": 0.48791027069091797,
|
| 14746 |
+
"learning_rate": 1.848438584331438e-05,
|
| 14747 |
+
"loss": 0.0299,
|
| 14748 |
+
"step": 2093
|
| 14749 |
+
},
|
| 14750 |
+
{
|
| 14751 |
+
"epoch": 2.418013856812933,
|
| 14752 |
+
"grad_norm": 0.3490602374076843,
|
| 14753 |
+
"learning_rate": 1.8413584703837615e-05,
|
| 14754 |
+
"loss": 0.0367,
|
| 14755 |
+
"step": 2094
|
| 14756 |
+
},
|
| 14757 |
+
{
|
| 14758 |
+
"epoch": 2.4191685912240186,
|
| 14759 |
+
"grad_norm": 0.24132898449897766,
|
| 14760 |
+
"learning_rate": 1.8342905667689747e-05,
|
| 14761 |
+
"loss": 0.0173,
|
| 14762 |
+
"step": 2095
|
| 14763 |
+
},
|
| 14764 |
+
{
|
| 14765 |
+
"epoch": 2.420323325635104,
|
| 14766 |
+
"grad_norm": 0.4030684530735016,
|
| 14767 |
+
"learning_rate": 1.8272348840649923e-05,
|
| 14768 |
+
"loss": 0.0274,
|
| 14769 |
+
"step": 2096
|
| 14770 |
+
},
|
| 14771 |
+
{
|
| 14772 |
+
"epoch": 2.4214780600461894,
|
| 14773 |
+
"grad_norm": 0.3054978549480438,
|
| 14774 |
+
"learning_rate": 1.820191432831445e-05,
|
| 14775 |
+
"loss": 0.0319,
|
| 14776 |
+
"step": 2097
|
| 14777 |
+
},
|
| 14778 |
+
{
|
| 14779 |
+
"epoch": 2.4226327944572748,
|
| 14780 |
+
"grad_norm": 0.4171411991119385,
|
| 14781 |
+
"learning_rate": 1.8131602236096624e-05,
|
| 14782 |
+
"loss": 0.0445,
|
| 14783 |
+
"step": 2098
|
| 14784 |
+
},
|
| 14785 |
+
{
|
| 14786 |
+
"epoch": 2.42378752886836,
|
| 14787 |
+
"grad_norm": 0.3456399440765381,
|
| 14788 |
+
"learning_rate": 1.806141266922641e-05,
|
| 14789 |
+
"loss": 0.031,
|
| 14790 |
+
"step": 2099
|
| 14791 |
+
},
|
| 14792 |
+
{
|
| 14793 |
+
"epoch": 2.424942263279446,
|
| 14794 |
+
"grad_norm": 0.2740873694419861,
|
| 14795 |
+
"learning_rate": 1.7991345732750474e-05,
|
| 14796 |
+
"loss": 0.021,
|
| 14797 |
+
"step": 2100
|
| 14798 |
+
},
|
| 14799 |
+
{
|
| 14800 |
+
"epoch": 2.4260969976905313,
|
| 14801 |
+
"grad_norm": 0.29411956667900085,
|
| 14802 |
+
"learning_rate": 1.7921401531531955e-05,
|
| 14803 |
+
"loss": 0.0229,
|
| 14804 |
+
"step": 2101
|
| 14805 |
+
},
|
| 14806 |
+
{
|
| 14807 |
+
"epoch": 2.4272517321016167,
|
| 14808 |
+
"grad_norm": 0.5076618194580078,
|
| 14809 |
+
"learning_rate": 1.7851580170250304e-05,
|
| 14810 |
+
"loss": 0.0432,
|
| 14811 |
+
"step": 2102
|
| 14812 |
+
},
|
| 14813 |
+
{
|
| 14814 |
+
"epoch": 2.428406466512702,
|
| 14815 |
+
"grad_norm": 0.35058918595314026,
|
| 14816 |
+
"learning_rate": 1.7781881753401042e-05,
|
| 14817 |
+
"loss": 0.0243,
|
| 14818 |
+
"step": 2103
|
| 14819 |
+
},
|
| 14820 |
+
{
|
| 14821 |
+
"epoch": 2.4295612009237875,
|
| 14822 |
+
"grad_norm": 0.5036923885345459,
|
| 14823 |
+
"learning_rate": 1.771230638529582e-05,
|
| 14824 |
+
"loss": 0.0413,
|
| 14825 |
+
"step": 2104
|
| 14826 |
+
},
|
| 14827 |
+
{
|
| 14828 |
+
"epoch": 2.430715935334873,
|
| 14829 |
+
"grad_norm": 0.3399944305419922,
|
| 14830 |
+
"learning_rate": 1.7642854170062018e-05,
|
| 14831 |
+
"loss": 0.0246,
|
| 14832 |
+
"step": 2105
|
| 14833 |
+
},
|
| 14834 |
+
{
|
| 14835 |
+
"epoch": 2.4318706697459582,
|
| 14836 |
+
"grad_norm": 0.23630475997924805,
|
| 14837 |
+
"learning_rate": 1.757352521164284e-05,
|
| 14838 |
+
"loss": 0.0128,
|
| 14839 |
+
"step": 2106
|
| 14840 |
+
},
|
| 14841 |
+
{
|
| 14842 |
+
"epoch": 2.433025404157044,
|
| 14843 |
+
"grad_norm": 0.4531160891056061,
|
| 14844 |
+
"learning_rate": 1.7504319613796872e-05,
|
| 14845 |
+
"loss": 0.0426,
|
| 14846 |
+
"step": 2107
|
| 14847 |
+
},
|
| 14848 |
+
{
|
| 14849 |
+
"epoch": 2.4341801385681294,
|
| 14850 |
+
"grad_norm": 0.36025765538215637,
|
| 14851 |
+
"learning_rate": 1.7435237480098166e-05,
|
| 14852 |
+
"loss": 0.0328,
|
| 14853 |
+
"step": 2108
|
| 14854 |
+
},
|
| 14855 |
+
{
|
| 14856 |
+
"epoch": 2.435334872979215,
|
| 14857 |
+
"grad_norm": 0.4998210668563843,
|
| 14858 |
+
"learning_rate": 1.7366278913935973e-05,
|
| 14859 |
+
"loss": 0.0632,
|
| 14860 |
+
"step": 2109
|
| 14861 |
+
},
|
| 14862 |
+
{
|
| 14863 |
+
"epoch": 2.4364896073903,
|
| 14864 |
+
"grad_norm": 0.4029538631439209,
|
| 14865 |
+
"learning_rate": 1.729744401851463e-05,
|
| 14866 |
+
"loss": 0.0234,
|
| 14867 |
+
"step": 2110
|
| 14868 |
+
},
|
| 14869 |
+
{
|
| 14870 |
+
"epoch": 2.4376443418013856,
|
| 14871 |
+
"grad_norm": 0.45084479451179504,
|
| 14872 |
+
"learning_rate": 1.722873289685334e-05,
|
| 14873 |
+
"loss": 0.0367,
|
| 14874 |
+
"step": 2111
|
| 14875 |
+
},
|
| 14876 |
+
{
|
| 14877 |
+
"epoch": 2.438799076212471,
|
| 14878 |
+
"grad_norm": 0.5267403721809387,
|
| 14879 |
+
"learning_rate": 1.7160145651786098e-05,
|
| 14880 |
+
"loss": 0.0474,
|
| 14881 |
+
"step": 2112
|
| 14882 |
+
},
|
| 14883 |
+
{
|
| 14884 |
+
"epoch": 2.4399538106235568,
|
| 14885 |
+
"grad_norm": 1.1001996994018555,
|
| 14886 |
+
"learning_rate": 1.7091682385961505e-05,
|
| 14887 |
+
"loss": 0.0812,
|
| 14888 |
+
"step": 2113
|
| 14889 |
+
},
|
| 14890 |
+
{
|
| 14891 |
+
"epoch": 2.441108545034642,
|
| 14892 |
+
"grad_norm": 0.27831560373306274,
|
| 14893 |
+
"learning_rate": 1.7023343201842613e-05,
|
| 14894 |
+
"loss": 0.0183,
|
| 14895 |
+
"step": 2114
|
| 14896 |
+
},
|
| 14897 |
+
{
|
| 14898 |
+
"epoch": 2.4422632794457275,
|
| 14899 |
+
"grad_norm": 0.48786744475364685,
|
| 14900 |
+
"learning_rate": 1.695512820170676e-05,
|
| 14901 |
+
"loss": 0.0389,
|
| 14902 |
+
"step": 2115
|
| 14903 |
+
},
|
| 14904 |
+
{
|
| 14905 |
+
"epoch": 2.443418013856813,
|
| 14906 |
+
"grad_norm": 1.5245107412338257,
|
| 14907 |
+
"learning_rate": 1.688703748764545e-05,
|
| 14908 |
+
"loss": 0.0402,
|
| 14909 |
+
"step": 2116
|
| 14910 |
+
},
|
| 14911 |
+
{
|
| 14912 |
+
"epoch": 2.4445727482678983,
|
| 14913 |
+
"grad_norm": 1.4664682149887085,
|
| 14914 |
+
"learning_rate": 1.681907116156417e-05,
|
| 14915 |
+
"loss": 0.2008,
|
| 14916 |
+
"step": 2117
|
| 14917 |
+
},
|
| 14918 |
+
{
|
| 14919 |
+
"epoch": 2.4457274826789837,
|
| 14920 |
+
"grad_norm": 0.27621322870254517,
|
| 14921 |
+
"learning_rate": 1.6751229325182195e-05,
|
| 14922 |
+
"loss": 0.0203,
|
| 14923 |
+
"step": 2118
|
| 14924 |
+
},
|
| 14925 |
+
{
|
| 14926 |
+
"epoch": 2.4468822170900695,
|
| 14927 |
+
"grad_norm": 0.5283107161521912,
|
| 14928 |
+
"learning_rate": 1.6683512080032548e-05,
|
| 14929 |
+
"loss": 0.058,
|
| 14930 |
+
"step": 2119
|
| 14931 |
+
},
|
| 14932 |
+
{
|
| 14933 |
+
"epoch": 2.448036951501155,
|
| 14934 |
+
"grad_norm": 0.1791423261165619,
|
| 14935 |
+
"learning_rate": 1.66159195274618e-05,
|
| 14936 |
+
"loss": 0.0085,
|
| 14937 |
+
"step": 2120
|
| 14938 |
+
},
|
| 14939 |
+
{
|
| 14940 |
+
"epoch": 2.4491916859122402,
|
| 14941 |
+
"grad_norm": 0.37542441487312317,
|
| 14942 |
+
"learning_rate": 1.6548451768629848e-05,
|
| 14943 |
+
"loss": 0.0288,
|
| 14944 |
+
"step": 2121
|
| 14945 |
+
},
|
| 14946 |
+
{
|
| 14947 |
+
"epoch": 2.4503464203233256,
|
| 14948 |
+
"grad_norm": 0.32993608713150024,
|
| 14949 |
+
"learning_rate": 1.6481108904509858e-05,
|
| 14950 |
+
"loss": 0.0318,
|
| 14951 |
+
"step": 2122
|
| 14952 |
+
},
|
| 14953 |
+
{
|
| 14954 |
+
"epoch": 2.451501154734411,
|
| 14955 |
+
"grad_norm": 0.4791440963745117,
|
| 14956 |
+
"learning_rate": 1.6413891035888062e-05,
|
| 14957 |
+
"loss": 0.0505,
|
| 14958 |
+
"step": 2123
|
| 14959 |
+
},
|
| 14960 |
+
{
|
| 14961 |
+
"epoch": 2.4526558891454964,
|
| 14962 |
+
"grad_norm": 0.3135989308357239,
|
| 14963 |
+
"learning_rate": 1.634679826336366e-05,
|
| 14964 |
+
"loss": 0.0274,
|
| 14965 |
+
"step": 2124
|
| 14966 |
+
},
|
| 14967 |
+
{
|
| 14968 |
+
"epoch": 2.453810623556582,
|
| 14969 |
+
"grad_norm": 0.386027991771698,
|
| 14970 |
+
"learning_rate": 1.6279830687348563e-05,
|
| 14971 |
+
"loss": 0.0415,
|
| 14972 |
+
"step": 2125
|
| 14973 |
+
},
|
| 14974 |
+
{
|
| 14975 |
+
"epoch": 2.4549653579676676,
|
| 14976 |
+
"grad_norm": 0.73509681224823,
|
| 14977 |
+
"learning_rate": 1.6212988408067354e-05,
|
| 14978 |
+
"loss": 0.067,
|
| 14979 |
+
"step": 2126
|
| 14980 |
+
},
|
| 14981 |
+
{
|
| 14982 |
+
"epoch": 2.456120092378753,
|
| 14983 |
+
"grad_norm": 0.35054200887680054,
|
| 14984 |
+
"learning_rate": 1.614627152555709e-05,
|
| 14985 |
+
"loss": 0.0288,
|
| 14986 |
+
"step": 2127
|
| 14987 |
+
},
|
| 14988 |
+
{
|
| 14989 |
+
"epoch": 2.4572748267898383,
|
| 14990 |
+
"grad_norm": 0.32859373092651367,
|
| 14991 |
+
"learning_rate": 1.6079680139667185e-05,
|
| 14992 |
+
"loss": 0.0238,
|
| 14993 |
+
"step": 2128
|
| 14994 |
+
},
|
| 14995 |
+
{
|
| 14996 |
+
"epoch": 2.4584295612009237,
|
| 14997 |
+
"grad_norm": 0.27772271633148193,
|
| 14998 |
+
"learning_rate": 1.6013214350059136e-05,
|
| 14999 |
+
"loss": 0.0145,
|
| 15000 |
+
"step": 2129
|
| 15001 |
+
},
|
| 15002 |
+
{
|
| 15003 |
+
"epoch": 2.459584295612009,
|
| 15004 |
+
"grad_norm": 0.5207153558731079,
|
| 15005 |
+
"learning_rate": 1.5946874256206613e-05,
|
| 15006 |
+
"loss": 0.0523,
|
| 15007 |
+
"step": 2130
|
| 15008 |
+
},
|
| 15009 |
+
{
|
| 15010 |
+
"epoch": 2.4607390300230945,
|
| 15011 |
+
"grad_norm": 0.17033100128173828,
|
| 15012 |
+
"learning_rate": 1.5880659957395062e-05,
|
| 15013 |
+
"loss": 0.0092,
|
| 15014 |
+
"step": 2131
|
| 15015 |
+
},
|
| 15016 |
+
{
|
| 15017 |
+
"epoch": 2.4618937644341803,
|
| 15018 |
+
"grad_norm": 0.40113627910614014,
|
| 15019 |
+
"learning_rate": 1.5814571552721724e-05,
|
| 15020 |
+
"loss": 0.0274,
|
| 15021 |
+
"step": 2132
|
| 15022 |
+
},
|
| 15023 |
+
{
|
| 15024 |
+
"epoch": 2.4630484988452657,
|
| 15025 |
+
"grad_norm": 0.5428476929664612,
|
| 15026 |
+
"learning_rate": 1.5748609141095348e-05,
|
| 15027 |
+
"loss": 0.0558,
|
| 15028 |
+
"step": 2133
|
| 15029 |
+
},
|
| 15030 |
+
{
|
| 15031 |
+
"epoch": 2.464203233256351,
|
| 15032 |
+
"grad_norm": 0.3679807782173157,
|
| 15033 |
+
"learning_rate": 1.568277282123619e-05,
|
| 15034 |
+
"loss": 0.0366,
|
| 15035 |
+
"step": 2134
|
| 15036 |
+
},
|
| 15037 |
+
{
|
| 15038 |
+
"epoch": 2.4653579676674364,
|
| 15039 |
+
"grad_norm": 0.4873906075954437,
|
| 15040 |
+
"learning_rate": 1.5617062691675778e-05,
|
| 15041 |
+
"loss": 0.0547,
|
| 15042 |
+
"step": 2135
|
| 15043 |
+
},
|
| 15044 |
+
{
|
| 15045 |
+
"epoch": 2.466512702078522,
|
| 15046 |
+
"grad_norm": 0.33764851093292236,
|
| 15047 |
+
"learning_rate": 1.5551478850756797e-05,
|
| 15048 |
+
"loss": 0.0238,
|
| 15049 |
+
"step": 2136
|
| 15050 |
+
},
|
| 15051 |
+
{
|
| 15052 |
+
"epoch": 2.4676674364896076,
|
| 15053 |
+
"grad_norm": 0.4844594895839691,
|
| 15054 |
+
"learning_rate": 1.5486021396632845e-05,
|
| 15055 |
+
"loss": 0.0432,
|
| 15056 |
+
"step": 2137
|
| 15057 |
+
},
|
| 15058 |
+
{
|
| 15059 |
+
"epoch": 2.468822170900693,
|
| 15060 |
+
"grad_norm": 0.6372563242912292,
|
| 15061 |
+
"learning_rate": 1.5420690427268493e-05,
|
| 15062 |
+
"loss": 0.0546,
|
| 15063 |
+
"step": 2138
|
| 15064 |
+
},
|
| 15065 |
+
{
|
| 15066 |
+
"epoch": 2.4699769053117784,
|
| 15067 |
+
"grad_norm": 0.451848566532135,
|
| 15068 |
+
"learning_rate": 1.535548604043895e-05,
|
| 15069 |
+
"loss": 0.0352,
|
| 15070 |
+
"step": 2139
|
| 15071 |
+
},
|
| 15072 |
+
{
|
| 15073 |
+
"epoch": 2.4711316397228638,
|
| 15074 |
+
"grad_norm": 0.32307589054107666,
|
| 15075 |
+
"learning_rate": 1.5290408333729935e-05,
|
| 15076 |
+
"loss": 0.0239,
|
| 15077 |
+
"step": 2140
|
| 15078 |
+
},
|
| 15079 |
+
{
|
| 15080 |
+
"epoch": 2.472286374133949,
|
| 15081 |
+
"grad_norm": 0.500898003578186,
|
| 15082 |
+
"learning_rate": 1.5225457404537636e-05,
|
| 15083 |
+
"loss": 0.0551,
|
| 15084 |
+
"step": 2141
|
| 15085 |
+
},
|
| 15086 |
+
{
|
| 15087 |
+
"epoch": 2.4734411085450345,
|
| 15088 |
+
"grad_norm": 0.4481404423713684,
|
| 15089 |
+
"learning_rate": 1.5160633350068509e-05,
|
| 15090 |
+
"loss": 0.0508,
|
| 15091 |
+
"step": 2142
|
| 15092 |
+
},
|
| 15093 |
+
{
|
| 15094 |
+
"epoch": 2.47459584295612,
|
| 15095 |
+
"grad_norm": 1.1378848552703857,
|
| 15096 |
+
"learning_rate": 1.5095936267339117e-05,
|
| 15097 |
+
"loss": 0.0343,
|
| 15098 |
+
"step": 2143
|
| 15099 |
+
},
|
| 15100 |
+
{
|
| 15101 |
+
"epoch": 2.4757505773672057,
|
| 15102 |
+
"grad_norm": 0.45615842938423157,
|
| 15103 |
+
"learning_rate": 1.5031366253175949e-05,
|
| 15104 |
+
"loss": 0.0374,
|
| 15105 |
+
"step": 2144
|
| 15106 |
+
},
|
| 15107 |
+
{
|
| 15108 |
+
"epoch": 2.476905311778291,
|
| 15109 |
+
"grad_norm": 0.4494178891181946,
|
| 15110 |
+
"learning_rate": 1.4966923404215339e-05,
|
| 15111 |
+
"loss": 0.0405,
|
| 15112 |
+
"step": 2145
|
| 15113 |
+
},
|
| 15114 |
+
{
|
| 15115 |
+
"epoch": 2.4780600461893765,
|
| 15116 |
+
"grad_norm": 0.3530513346195221,
|
| 15117 |
+
"learning_rate": 1.4902607816903414e-05,
|
| 15118 |
+
"loss": 0.0279,
|
| 15119 |
+
"step": 2146
|
| 15120 |
+
},
|
| 15121 |
+
{
|
| 15122 |
+
"epoch": 2.479214780600462,
|
| 15123 |
+
"grad_norm": 0.861702561378479,
|
| 15124 |
+
"learning_rate": 1.4838419587495666e-05,
|
| 15125 |
+
"loss": 0.0597,
|
| 15126 |
+
"step": 2147
|
| 15127 |
+
},
|
| 15128 |
+
{
|
| 15129 |
+
"epoch": 2.4803695150115472,
|
| 15130 |
+
"grad_norm": 1.312022089958191,
|
| 15131 |
+
"learning_rate": 1.4774358812057099e-05,
|
| 15132 |
+
"loss": 0.0991,
|
| 15133 |
+
"step": 2148
|
| 15134 |
+
},
|
| 15135 |
+
{
|
| 15136 |
+
"epoch": 2.4815242494226326,
|
| 15137 |
+
"grad_norm": 0.29937440156936646,
|
| 15138 |
+
"learning_rate": 1.4710425586461929e-05,
|
| 15139 |
+
"loss": 0.0195,
|
| 15140 |
+
"step": 2149
|
| 15141 |
+
},
|
| 15142 |
+
{
|
| 15143 |
+
"epoch": 2.4826789838337184,
|
| 15144 |
+
"grad_norm": 0.3170422911643982,
|
| 15145 |
+
"learning_rate": 1.4646620006393497e-05,
|
| 15146 |
+
"loss": 0.0235,
|
| 15147 |
+
"step": 2150
|
| 15148 |
+
},
|
| 15149 |
+
{
|
| 15150 |
+
"epoch": 2.483833718244804,
|
| 15151 |
+
"grad_norm": 0.489699125289917,
|
| 15152 |
+
"learning_rate": 1.458294216734406e-05,
|
| 15153 |
+
"loss": 0.0426,
|
| 15154 |
+
"step": 2151
|
| 15155 |
+
},
|
| 15156 |
+
{
|
| 15157 |
+
"epoch": 2.484988452655889,
|
| 15158 |
+
"grad_norm": 0.4443921446800232,
|
| 15159 |
+
"learning_rate": 1.4519392164614743e-05,
|
| 15160 |
+
"loss": 0.0266,
|
| 15161 |
+
"step": 2152
|
| 15162 |
+
},
|
| 15163 |
+
{
|
| 15164 |
+
"epoch": 2.4861431870669746,
|
| 15165 |
+
"grad_norm": 0.45840251445770264,
|
| 15166 |
+
"learning_rate": 1.445597009331533e-05,
|
| 15167 |
+
"loss": 0.0395,
|
| 15168 |
+
"step": 2153
|
| 15169 |
+
},
|
| 15170 |
+
{
|
| 15171 |
+
"epoch": 2.48729792147806,
|
| 15172 |
+
"grad_norm": 0.40167301893234253,
|
| 15173 |
+
"learning_rate": 1.4392676048364195e-05,
|
| 15174 |
+
"loss": 0.0295,
|
| 15175 |
+
"step": 2154
|
| 15176 |
+
},
|
| 15177 |
+
{
|
| 15178 |
+
"epoch": 2.4884526558891453,
|
| 15179 |
+
"grad_norm": 0.6674041748046875,
|
| 15180 |
+
"learning_rate": 1.432951012448801e-05,
|
| 15181 |
+
"loss": 0.0667,
|
| 15182 |
+
"step": 2155
|
| 15183 |
+
},
|
| 15184 |
+
{
|
| 15185 |
+
"epoch": 2.4896073903002307,
|
| 15186 |
+
"grad_norm": 0.3025205731391907,
|
| 15187 |
+
"learning_rate": 1.4266472416221755e-05,
|
| 15188 |
+
"loss": 0.0187,
|
| 15189 |
+
"step": 2156
|
| 15190 |
+
},
|
| 15191 |
+
{
|
| 15192 |
+
"epoch": 2.4907621247113165,
|
| 15193 |
+
"grad_norm": 0.5132309198379517,
|
| 15194 |
+
"learning_rate": 1.4203563017908527e-05,
|
| 15195 |
+
"loss": 0.0652,
|
| 15196 |
+
"step": 2157
|
| 15197 |
+
},
|
| 15198 |
+
{
|
| 15199 |
+
"epoch": 2.491916859122402,
|
| 15200 |
+
"grad_norm": 0.3189202845096588,
|
| 15201 |
+
"learning_rate": 1.4140782023699395e-05,
|
| 15202 |
+
"loss": 0.0224,
|
| 15203 |
+
"step": 2158
|
| 15204 |
+
},
|
| 15205 |
+
{
|
| 15206 |
+
"epoch": 2.4930715935334873,
|
| 15207 |
+
"grad_norm": 0.7046847343444824,
|
| 15208 |
+
"learning_rate": 1.4078129527553196e-05,
|
| 15209 |
+
"loss": 0.0616,
|
| 15210 |
+
"step": 2159
|
| 15211 |
+
},
|
| 15212 |
+
{
|
| 15213 |
+
"epoch": 2.4942263279445727,
|
| 15214 |
+
"grad_norm": 0.49465280771255493,
|
| 15215 |
+
"learning_rate": 1.4015605623236516e-05,
|
| 15216 |
+
"loss": 0.0327,
|
| 15217 |
+
"step": 2160
|
| 15218 |
+
},
|
| 15219 |
+
{
|
| 15220 |
+
"epoch": 2.495381062355658,
|
| 15221 |
+
"grad_norm": 0.5755635499954224,
|
| 15222 |
+
"learning_rate": 1.3953210404323491e-05,
|
| 15223 |
+
"loss": 0.0411,
|
| 15224 |
+
"step": 2161
|
| 15225 |
+
},
|
| 15226 |
+
{
|
| 15227 |
+
"epoch": 2.496535796766744,
|
| 15228 |
+
"grad_norm": 0.4185197949409485,
|
| 15229 |
+
"learning_rate": 1.3890943964195636e-05,
|
| 15230 |
+
"loss": 0.0415,
|
| 15231 |
+
"step": 2162
|
| 15232 |
+
},
|
| 15233 |
+
{
|
| 15234 |
+
"epoch": 2.4976905311778292,
|
| 15235 |
+
"grad_norm": 0.4589727520942688,
|
| 15236 |
+
"learning_rate": 1.3828806396041739e-05,
|
| 15237 |
+
"loss": 0.0584,
|
| 15238 |
+
"step": 2163
|
| 15239 |
+
},
|
| 15240 |
+
{
|
| 15241 |
+
"epoch": 2.4988452655889146,
|
| 15242 |
+
"grad_norm": 0.5185219049453735,
|
| 15243 |
+
"learning_rate": 1.3766797792857733e-05,
|
| 15244 |
+
"loss": 0.0294,
|
| 15245 |
+
"step": 2164
|
| 15246 |
+
},
|
| 15247 |
+
{
|
| 15248 |
+
"epoch": 2.5,
|
| 15249 |
+
"grad_norm": 0.4171558618545532,
|
| 15250 |
+
"learning_rate": 1.3704918247446541e-05,
|
| 15251 |
+
"loss": 0.0241,
|
| 15252 |
+
"step": 2165
|
| 15253 |
+
},
|
| 15254 |
+
{
|
| 15255 |
+
"epoch": 2.5011547344110854,
|
| 15256 |
+
"grad_norm": 0.25600677728652954,
|
| 15257 |
+
"learning_rate": 1.3643167852417893e-05,
|
| 15258 |
+
"loss": 0.0182,
|
| 15259 |
+
"step": 2166
|
| 15260 |
+
},
|
| 15261 |
+
{
|
| 15262 |
+
"epoch": 2.5023094688221708,
|
| 15263 |
+
"grad_norm": 0.5290056467056274,
|
| 15264 |
+
"learning_rate": 1.3581546700188274e-05,
|
| 15265 |
+
"loss": 0.0515,
|
| 15266 |
+
"step": 2167
|
| 15267 |
+
},
|
| 15268 |
+
{
|
| 15269 |
+
"epoch": 2.503464203233256,
|
| 15270 |
+
"grad_norm": 0.6815495491027832,
|
| 15271 |
+
"learning_rate": 1.3520054882980738e-05,
|
| 15272 |
+
"loss": 0.0562,
|
| 15273 |
+
"step": 2168
|
| 15274 |
+
},
|
| 15275 |
+
{
|
| 15276 |
+
"epoch": 2.504618937644342,
|
| 15277 |
+
"grad_norm": 0.7933973073959351,
|
| 15278 |
+
"learning_rate": 1.3458692492824765e-05,
|
| 15279 |
+
"loss": 0.0555,
|
| 15280 |
+
"step": 2169
|
| 15281 |
+
},
|
| 15282 |
+
{
|
| 15283 |
+
"epoch": 2.5057736720554273,
|
| 15284 |
+
"grad_norm": 0.4652630090713501,
|
| 15285 |
+
"learning_rate": 1.339745962155613e-05,
|
| 15286 |
+
"loss": 0.0406,
|
| 15287 |
+
"step": 2170
|
| 15288 |
+
},
|
| 15289 |
+
{
|
| 15290 |
+
"epoch": 2.5069284064665127,
|
| 15291 |
+
"grad_norm": 0.4116566479206085,
|
| 15292 |
+
"learning_rate": 1.3336356360816782e-05,
|
| 15293 |
+
"loss": 0.042,
|
| 15294 |
+
"step": 2171
|
| 15295 |
+
},
|
| 15296 |
+
{
|
| 15297 |
+
"epoch": 2.508083140877598,
|
| 15298 |
+
"grad_norm": 0.5475776791572571,
|
| 15299 |
+
"learning_rate": 1.3275382802054704e-05,
|
| 15300 |
+
"loss": 0.0437,
|
| 15301 |
+
"step": 2172
|
| 15302 |
+
},
|
| 15303 |
+
{
|
| 15304 |
+
"epoch": 2.5092378752886835,
|
| 15305 |
+
"grad_norm": 0.7758086919784546,
|
| 15306 |
+
"learning_rate": 1.3214539036523698e-05,
|
| 15307 |
+
"loss": 0.0362,
|
| 15308 |
+
"step": 2173
|
| 15309 |
+
},
|
| 15310 |
+
{
|
| 15311 |
+
"epoch": 2.5103926096997693,
|
| 15312 |
+
"grad_norm": 0.45655885338783264,
|
| 15313 |
+
"learning_rate": 1.3153825155283395e-05,
|
| 15314 |
+
"loss": 0.0418,
|
| 15315 |
+
"step": 2174
|
| 15316 |
+
},
|
| 15317 |
+
{
|
| 15318 |
+
"epoch": 2.5115473441108547,
|
| 15319 |
+
"grad_norm": 0.5047776103019714,
|
| 15320 |
+
"learning_rate": 1.3093241249198995e-05,
|
| 15321 |
+
"loss": 0.0394,
|
| 15322 |
+
"step": 2175
|
| 15323 |
+
},
|
| 15324 |
+
{
|
| 15325 |
+
"epoch": 2.51270207852194,
|
| 15326 |
+
"grad_norm": 0.42681238055229187,
|
| 15327 |
+
"learning_rate": 1.3032787408941216e-05,
|
| 15328 |
+
"loss": 0.0397,
|
| 15329 |
+
"step": 2176
|
| 15330 |
+
},
|
| 15331 |
+
{
|
| 15332 |
+
"epoch": 2.5138568129330254,
|
| 15333 |
+
"grad_norm": 0.44853439927101135,
|
| 15334 |
+
"learning_rate": 1.2972463724986029e-05,
|
| 15335 |
+
"loss": 0.042,
|
| 15336 |
+
"step": 2177
|
| 15337 |
+
},
|
| 15338 |
+
{
|
| 15339 |
+
"epoch": 2.515011547344111,
|
| 15340 |
+
"grad_norm": 0.24983073770999908,
|
| 15341 |
+
"learning_rate": 1.2912270287614736e-05,
|
| 15342 |
+
"loss": 0.0204,
|
| 15343 |
+
"step": 2178
|
| 15344 |
+
},
|
| 15345 |
+
{
|
| 15346 |
+
"epoch": 2.516166281755196,
|
| 15347 |
+
"grad_norm": 0.38597530126571655,
|
| 15348 |
+
"learning_rate": 1.2852207186913623e-05,
|
| 15349 |
+
"loss": 0.026,
|
| 15350 |
+
"step": 2179
|
| 15351 |
+
},
|
| 15352 |
+
{
|
| 15353 |
+
"epoch": 2.5173210161662816,
|
| 15354 |
+
"grad_norm": 0.2704150080680847,
|
| 15355 |
+
"learning_rate": 1.279227451277395e-05,
|
| 15356 |
+
"loss": 0.0188,
|
| 15357 |
+
"step": 2180
|
| 15358 |
+
},
|
| 15359 |
+
{
|
| 15360 |
+
"epoch": 2.518475750577367,
|
| 15361 |
+
"grad_norm": 0.2940499186515808,
|
| 15362 |
+
"learning_rate": 1.2732472354891733e-05,
|
| 15363 |
+
"loss": 0.0331,
|
| 15364 |
+
"step": 2181
|
| 15365 |
+
},
|
| 15366 |
+
{
|
| 15367 |
+
"epoch": 2.5196304849884528,
|
| 15368 |
+
"grad_norm": 0.4689074754714966,
|
| 15369 |
+
"learning_rate": 1.2672800802767715e-05,
|
| 15370 |
+
"loss": 0.0421,
|
| 15371 |
+
"step": 2182
|
| 15372 |
+
},
|
| 15373 |
+
{
|
| 15374 |
+
"epoch": 2.520785219399538,
|
| 15375 |
+
"grad_norm": 0.4422677755355835,
|
| 15376 |
+
"learning_rate": 1.2613259945707135e-05,
|
| 15377 |
+
"loss": 0.0367,
|
| 15378 |
+
"step": 2183
|
| 15379 |
+
},
|
| 15380 |
+
{
|
| 15381 |
+
"epoch": 2.5219399538106235,
|
| 15382 |
+
"grad_norm": 0.37683725357055664,
|
| 15383 |
+
"learning_rate": 1.2553849872819656e-05,
|
| 15384 |
+
"loss": 0.0317,
|
| 15385 |
+
"step": 2184
|
| 15386 |
+
},
|
| 15387 |
+
{
|
| 15388 |
+
"epoch": 2.523094688221709,
|
| 15389 |
+
"grad_norm": 0.49201449751853943,
|
| 15390 |
+
"learning_rate": 1.2494570673019146e-05,
|
| 15391 |
+
"loss": 0.0513,
|
| 15392 |
+
"step": 2185
|
| 15393 |
+
},
|
| 15394 |
+
{
|
| 15395 |
+
"epoch": 2.5242494226327947,
|
| 15396 |
+
"grad_norm": 0.45675837993621826,
|
| 15397 |
+
"learning_rate": 1.2435422435023714e-05,
|
| 15398 |
+
"loss": 0.0401,
|
| 15399 |
+
"step": 2186
|
| 15400 |
+
},
|
| 15401 |
+
{
|
| 15402 |
+
"epoch": 2.52540415704388,
|
| 15403 |
+
"grad_norm": 0.37126728892326355,
|
| 15404 |
+
"learning_rate": 1.2376405247355394e-05,
|
| 15405 |
+
"loss": 0.0379,
|
| 15406 |
+
"step": 2187
|
| 15407 |
+
},
|
| 15408 |
+
{
|
| 15409 |
+
"epoch": 2.5265588914549655,
|
| 15410 |
+
"grad_norm": 0.24300338327884674,
|
| 15411 |
+
"learning_rate": 1.2317519198340089e-05,
|
| 15412 |
+
"loss": 0.0172,
|
| 15413 |
+
"step": 2188
|
| 15414 |
+
},
|
| 15415 |
+
{
|
| 15416 |
+
"epoch": 2.527713625866051,
|
| 15417 |
+
"grad_norm": 0.5260987877845764,
|
| 15418 |
+
"learning_rate": 1.2258764376107457e-05,
|
| 15419 |
+
"loss": 0.0403,
|
| 15420 |
+
"step": 2189
|
| 15421 |
+
},
|
| 15422 |
+
{
|
| 15423 |
+
"epoch": 2.5288683602771362,
|
| 15424 |
+
"grad_norm": 0.3073309659957886,
|
| 15425 |
+
"learning_rate": 1.2200140868590759e-05,
|
| 15426 |
+
"loss": 0.0291,
|
| 15427 |
+
"step": 2190
|
| 15428 |
+
},
|
| 15429 |
+
{
|
| 15430 |
+
"epoch": 2.5300230946882216,
|
| 15431 |
+
"grad_norm": 0.37183481454849243,
|
| 15432 |
+
"learning_rate": 1.2141648763526758e-05,
|
| 15433 |
+
"loss": 0.0273,
|
| 15434 |
+
"step": 2191
|
| 15435 |
+
},
|
| 15436 |
+
{
|
| 15437 |
+
"epoch": 2.531177829099307,
|
| 15438 |
+
"grad_norm": 0.46582087874412537,
|
| 15439 |
+
"learning_rate": 1.2083288148455474e-05,
|
| 15440 |
+
"loss": 0.021,
|
| 15441 |
+
"step": 2192
|
| 15442 |
+
},
|
| 15443 |
+
{
|
| 15444 |
+
"epoch": 2.5323325635103924,
|
| 15445 |
+
"grad_norm": 0.27384623885154724,
|
| 15446 |
+
"learning_rate": 1.2025059110720183e-05,
|
| 15447 |
+
"loss": 0.0257,
|
| 15448 |
+
"step": 2193
|
| 15449 |
+
},
|
| 15450 |
+
{
|
| 15451 |
+
"epoch": 2.533487297921478,
|
| 15452 |
+
"grad_norm": 0.3936280906200409,
|
| 15453 |
+
"learning_rate": 1.196696173746733e-05,
|
| 15454 |
+
"loss": 0.0206,
|
| 15455 |
+
"step": 2194
|
| 15456 |
+
},
|
| 15457 |
+
{
|
| 15458 |
+
"epoch": 2.5346420323325636,
|
| 15459 |
+
"grad_norm": 0.4202691614627838,
|
| 15460 |
+
"learning_rate": 1.1908996115646153e-05,
|
| 15461 |
+
"loss": 0.0407,
|
| 15462 |
+
"step": 2195
|
| 15463 |
+
},
|
| 15464 |
+
{
|
| 15465 |
+
"epoch": 2.535796766743649,
|
| 15466 |
+
"grad_norm": 0.5516156554222107,
|
| 15467 |
+
"learning_rate": 1.1851162332008815e-05,
|
| 15468 |
+
"loss": 0.0644,
|
| 15469 |
+
"step": 2196
|
| 15470 |
+
},
|
| 15471 |
+
{
|
| 15472 |
+
"epoch": 2.5369515011547343,
|
| 15473 |
+
"grad_norm": 0.37820619344711304,
|
| 15474 |
+
"learning_rate": 1.1793460473110118e-05,
|
| 15475 |
+
"loss": 0.03,
|
| 15476 |
+
"step": 2197
|
| 15477 |
+
},
|
| 15478 |
+
{
|
| 15479 |
+
"epoch": 2.5381062355658197,
|
| 15480 |
+
"grad_norm": 0.5360625386238098,
|
| 15481 |
+
"learning_rate": 1.1735890625307467e-05,
|
| 15482 |
+
"loss": 0.0425,
|
| 15483 |
+
"step": 2198
|
| 15484 |
+
},
|
| 15485 |
+
{
|
| 15486 |
+
"epoch": 2.5392609699769055,
|
| 15487 |
+
"grad_norm": 0.42113620042800903,
|
| 15488 |
+
"learning_rate": 1.1678452874760647e-05,
|
| 15489 |
+
"loss": 0.0338,
|
| 15490 |
+
"step": 2199
|
| 15491 |
+
},
|
| 15492 |
+
{
|
| 15493 |
+
"epoch": 2.540415704387991,
|
| 15494 |
+
"grad_norm": 0.33286795020103455,
|
| 15495 |
+
"learning_rate": 1.1621147307431768e-05,
|
| 15496 |
+
"loss": 0.028,
|
| 15497 |
+
"step": 2200
|
| 15498 |
+
},
|
| 15499 |
+
{
|
| 15500 |
+
"epoch": 2.540415704387991,
|
| 15501 |
+
"eval_loss": 0.12362270057201385,
|
| 15502 |
+
"eval_runtime": 3.8912,
|
| 15503 |
+
"eval_samples_per_second": 8.995,
|
| 15504 |
+
"eval_steps_per_second": 4.626,
|
| 15505 |
+
"step": 2200
|
| 15506 |
}
|
| 15507 |
],
|
| 15508 |
"logging_steps": 1,
|
|
|
|
| 15517 |
"early_stopping_threshold": 0.0
|
| 15518 |
},
|
| 15519 |
"attributes": {
|
| 15520 |
+
"early_stopping_patience_counter": 3
|
| 15521 |
}
|
| 15522 |
},
|
| 15523 |
"TrainerControl": {
|
|
|
|
| 15526 |
"should_evaluate": false,
|
| 15527 |
"should_log": false,
|
| 15528 |
"should_save": true,
|
| 15529 |
+
"should_training_stop": true
|
| 15530 |
},
|
| 15531 |
"attributes": {}
|
| 15532 |
}
|
| 15533 |
},
|
| 15534 |
+
"total_flos": 2.0512956151863706e+17,
|
| 15535 |
"train_batch_size": 2,
|
| 15536 |
"trial_name": null,
|
| 15537 |
"trial_params": null
|