Training in progress, step 16800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 70430032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:645da37f6cd210f85d2242551eab2b0bca7e5eaf8481b2872e157cc73f706402
|
| 3 |
size 70430032
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36136276
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:deed1841a049ad3c182d56c8b8227ac637dfe7600e052b1420bfbf899420c736
|
| 3 |
size 36136276
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:527ef17eae41cc2bd89b2e679ca035c49be10827a880a747e26c0df9654c65aa
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e81d92203ea60730fa74715b79dc2ee46d4d4e366c269105391ad727a064972
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 1.
|
| 3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -116879,6 +116879,1414 @@
|
|
| 116879 |
"eval_samples_per_second": 33.753,
|
| 116880 |
"eval_steps_per_second": 11.271,
|
| 116881 |
"step": 16600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116882 |
}
|
| 116883 |
],
|
| 116884 |
"logging_steps": 1,
|
|
@@ -116907,7 +118315,7 @@
|
|
| 116907 |
"attributes": {}
|
| 116908 |
}
|
| 116909 |
},
|
| 116910 |
-
"total_flos": 2.
|
| 116911 |
"train_batch_size": 3,
|
| 116912 |
"trial_name": null,
|
| 116913 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 1.9967706203460693,
|
| 3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-16800",
|
| 4 |
+
"epoch": 1.8112500673818124,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 16800,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 116879 |
"eval_samples_per_second": 33.753,
|
| 116880 |
"eval_steps_per_second": 11.271,
|
| 116881 |
"step": 16600
|
| 116882 |
+
},
|
| 116883 |
+
{
|
| 116884 |
+
"epoch": 1.7897956983451027,
|
| 116885 |
+
"grad_norm": 1.6048762798309326,
|
| 116886 |
+
"learning_rate": 7.023994141268762e-05,
|
| 116887 |
+
"loss": 2.1323,
|
| 116888 |
+
"step": 16601
|
| 116889 |
+
},
|
| 116890 |
+
{
|
| 116891 |
+
"epoch": 1.7899035092447848,
|
| 116892 |
+
"grad_norm": 1.6418133974075317,
|
| 116893 |
+
"learning_rate": 7.02291509981522e-05,
|
| 116894 |
+
"loss": 2.0456,
|
| 116895 |
+
"step": 16602
|
| 116896 |
+
},
|
| 116897 |
+
{
|
| 116898 |
+
"epoch": 1.7900113201444667,
|
| 116899 |
+
"grad_norm": 1.629949927330017,
|
| 116900 |
+
"learning_rate": 7.021836096394414e-05,
|
| 116901 |
+
"loss": 1.6071,
|
| 116902 |
+
"step": 16603
|
| 116903 |
+
},
|
| 116904 |
+
{
|
| 116905 |
+
"epoch": 1.7901191310441487,
|
| 116906 |
+
"grad_norm": 1.7133731842041016,
|
| 116907 |
+
"learning_rate": 7.020757131020136e-05,
|
| 116908 |
+
"loss": 1.9259,
|
| 116909 |
+
"step": 16604
|
| 116910 |
+
},
|
| 116911 |
+
{
|
| 116912 |
+
"epoch": 1.7902269419438306,
|
| 116913 |
+
"grad_norm": 1.7619162797927856,
|
| 116914 |
+
"learning_rate": 7.019678203706163e-05,
|
| 116915 |
+
"loss": 2.3188,
|
| 116916 |
+
"step": 16605
|
| 116917 |
+
},
|
| 116918 |
+
{
|
| 116919 |
+
"epoch": 1.7903347528435125,
|
| 116920 |
+
"grad_norm": 2.0171520709991455,
|
| 116921 |
+
"learning_rate": 7.018599314466282e-05,
|
| 116922 |
+
"loss": 1.4843,
|
| 116923 |
+
"step": 16606
|
| 116924 |
+
},
|
| 116925 |
+
{
|
| 116926 |
+
"epoch": 1.7904425637431944,
|
| 116927 |
+
"grad_norm": 1.739280343055725,
|
| 116928 |
+
"learning_rate": 7.017520463314277e-05,
|
| 116929 |
+
"loss": 2.1291,
|
| 116930 |
+
"step": 16607
|
| 116931 |
+
},
|
| 116932 |
+
{
|
| 116933 |
+
"epoch": 1.7905503746428764,
|
| 116934 |
+
"grad_norm": 1.8356544971466064,
|
| 116935 |
+
"learning_rate": 7.016441650263929e-05,
|
| 116936 |
+
"loss": 2.2262,
|
| 116937 |
+
"step": 16608
|
| 116938 |
+
},
|
| 116939 |
+
{
|
| 116940 |
+
"epoch": 1.7906581855425583,
|
| 116941 |
+
"grad_norm": 1.4841974973678589,
|
| 116942 |
+
"learning_rate": 7.015362875329021e-05,
|
| 116943 |
+
"loss": 1.9847,
|
| 116944 |
+
"step": 16609
|
| 116945 |
+
},
|
| 116946 |
+
{
|
| 116947 |
+
"epoch": 1.7907659964422402,
|
| 116948 |
+
"grad_norm": 1.6973233222961426,
|
| 116949 |
+
"learning_rate": 7.014284138523328e-05,
|
| 116950 |
+
"loss": 1.6568,
|
| 116951 |
+
"step": 16610
|
| 116952 |
+
},
|
| 116953 |
+
{
|
| 116954 |
+
"epoch": 1.7908738073419221,
|
| 116955 |
+
"grad_norm": 1.6693850755691528,
|
| 116956 |
+
"learning_rate": 7.01320543986064e-05,
|
| 116957 |
+
"loss": 1.971,
|
| 116958 |
+
"step": 16611
|
| 116959 |
+
},
|
| 116960 |
+
{
|
| 116961 |
+
"epoch": 1.790981618241604,
|
| 116962 |
+
"grad_norm": 1.9606497287750244,
|
| 116963 |
+
"learning_rate": 7.012126779354734e-05,
|
| 116964 |
+
"loss": 2.5799,
|
| 116965 |
+
"step": 16612
|
| 116966 |
+
},
|
| 116967 |
+
{
|
| 116968 |
+
"epoch": 1.7910894291412862,
|
| 116969 |
+
"grad_norm": 1.6667673587799072,
|
| 116970 |
+
"learning_rate": 7.011048157019387e-05,
|
| 116971 |
+
"loss": 1.9092,
|
| 116972 |
+
"step": 16613
|
| 116973 |
+
},
|
| 116974 |
+
{
|
| 116975 |
+
"epoch": 1.7911972400409681,
|
| 116976 |
+
"grad_norm": 1.8839538097381592,
|
| 116977 |
+
"learning_rate": 7.009969572868385e-05,
|
| 116978 |
+
"loss": 2.363,
|
| 116979 |
+
"step": 16614
|
| 116980 |
+
},
|
| 116981 |
+
{
|
| 116982 |
+
"epoch": 1.79130505094065,
|
| 116983 |
+
"grad_norm": 1.762976884841919,
|
| 116984 |
+
"learning_rate": 7.008891026915503e-05,
|
| 116985 |
+
"loss": 1.9364,
|
| 116986 |
+
"step": 16615
|
| 116987 |
+
},
|
| 116988 |
+
{
|
| 116989 |
+
"epoch": 1.7914128618403322,
|
| 116990 |
+
"grad_norm": 1.6582813262939453,
|
| 116991 |
+
"learning_rate": 7.007812519174517e-05,
|
| 116992 |
+
"loss": 1.8677,
|
| 116993 |
+
"step": 16616
|
| 116994 |
+
},
|
| 116995 |
+
{
|
| 116996 |
+
"epoch": 1.7915206727400141,
|
| 116997 |
+
"grad_norm": 1.3028643131256104,
|
| 116998 |
+
"learning_rate": 7.006734049659212e-05,
|
| 116999 |
+
"loss": 1.2821,
|
| 117000 |
+
"step": 16617
|
| 117001 |
+
},
|
| 117002 |
+
{
|
| 117003 |
+
"epoch": 1.791628483639696,
|
| 117004 |
+
"grad_norm": 1.6592934131622314,
|
| 117005 |
+
"learning_rate": 7.005655618383363e-05,
|
| 117006 |
+
"loss": 1.8353,
|
| 117007 |
+
"step": 16618
|
| 117008 |
+
},
|
| 117009 |
+
{
|
| 117010 |
+
"epoch": 1.791736294539378,
|
| 117011 |
+
"grad_norm": 1.5552796125411987,
|
| 117012 |
+
"learning_rate": 7.004577225360744e-05,
|
| 117013 |
+
"loss": 1.9086,
|
| 117014 |
+
"step": 16619
|
| 117015 |
+
},
|
| 117016 |
+
{
|
| 117017 |
+
"epoch": 1.79184410543906,
|
| 117018 |
+
"grad_norm": 1.6225606203079224,
|
| 117019 |
+
"learning_rate": 7.003498870605132e-05,
|
| 117020 |
+
"loss": 1.8369,
|
| 117021 |
+
"step": 16620
|
| 117022 |
+
},
|
| 117023 |
+
{
|
| 117024 |
+
"epoch": 1.7919519163387418,
|
| 117025 |
+
"grad_norm": 1.5022133588790894,
|
| 117026 |
+
"learning_rate": 7.002420554130307e-05,
|
| 117027 |
+
"loss": 1.6922,
|
| 117028 |
+
"step": 16621
|
| 117029 |
+
},
|
| 117030 |
+
{
|
| 117031 |
+
"epoch": 1.7920597272384238,
|
| 117032 |
+
"grad_norm": 1.7301616668701172,
|
| 117033 |
+
"learning_rate": 7.001342275950044e-05,
|
| 117034 |
+
"loss": 2.0619,
|
| 117035 |
+
"step": 16622
|
| 117036 |
+
},
|
| 117037 |
+
{
|
| 117038 |
+
"epoch": 1.7921675381381057,
|
| 117039 |
+
"grad_norm": 1.7533267736434937,
|
| 117040 |
+
"learning_rate": 7.000264036078112e-05,
|
| 117041 |
+
"loss": 1.8789,
|
| 117042 |
+
"step": 16623
|
| 117043 |
+
},
|
| 117044 |
+
{
|
| 117045 |
+
"epoch": 1.7922753490377876,
|
| 117046 |
+
"grad_norm": 1.4618946313858032,
|
| 117047 |
+
"learning_rate": 6.999185834528295e-05,
|
| 117048 |
+
"loss": 1.4497,
|
| 117049 |
+
"step": 16624
|
| 117050 |
+
},
|
| 117051 |
+
{
|
| 117052 |
+
"epoch": 1.7923831599374696,
|
| 117053 |
+
"grad_norm": 1.8028799295425415,
|
| 117054 |
+
"learning_rate": 6.998107671314362e-05,
|
| 117055 |
+
"loss": 2.6358,
|
| 117056 |
+
"step": 16625
|
| 117057 |
+
},
|
| 117058 |
+
{
|
| 117059 |
+
"epoch": 1.7924909708371515,
|
| 117060 |
+
"grad_norm": 1.6197258234024048,
|
| 117061 |
+
"learning_rate": 6.997029546450084e-05,
|
| 117062 |
+
"loss": 2.1749,
|
| 117063 |
+
"step": 16626
|
| 117064 |
+
},
|
| 117065 |
+
{
|
| 117066 |
+
"epoch": 1.7925987817368336,
|
| 117067 |
+
"grad_norm": 1.6914771795272827,
|
| 117068 |
+
"learning_rate": 6.99595145994924e-05,
|
| 117069 |
+
"loss": 2.3734,
|
| 117070 |
+
"step": 16627
|
| 117071 |
+
},
|
| 117072 |
+
{
|
| 117073 |
+
"epoch": 1.7927065926365156,
|
| 117074 |
+
"grad_norm": 1.4003772735595703,
|
| 117075 |
+
"learning_rate": 6.994873411825601e-05,
|
| 117076 |
+
"loss": 1.5046,
|
| 117077 |
+
"step": 16628
|
| 117078 |
+
},
|
| 117079 |
+
{
|
| 117080 |
+
"epoch": 1.7928144035361975,
|
| 117081 |
+
"grad_norm": 1.4779349565505981,
|
| 117082 |
+
"learning_rate": 6.993795402092941e-05,
|
| 117083 |
+
"loss": 1.6976,
|
| 117084 |
+
"step": 16629
|
| 117085 |
+
},
|
| 117086 |
+
{
|
| 117087 |
+
"epoch": 1.7929222144358796,
|
| 117088 |
+
"grad_norm": 1.767088770866394,
|
| 117089 |
+
"learning_rate": 6.992717430765023e-05,
|
| 117090 |
+
"loss": 2.2091,
|
| 117091 |
+
"step": 16630
|
| 117092 |
+
},
|
| 117093 |
+
{
|
| 117094 |
+
"epoch": 1.7930300253355616,
|
| 117095 |
+
"grad_norm": 1.6406548023223877,
|
| 117096 |
+
"learning_rate": 6.991639497855629e-05,
|
| 117097 |
+
"loss": 1.8148,
|
| 117098 |
+
"step": 16631
|
| 117099 |
+
},
|
| 117100 |
+
{
|
| 117101 |
+
"epoch": 1.7931378362352435,
|
| 117102 |
+
"grad_norm": 1.393308162689209,
|
| 117103 |
+
"learning_rate": 6.990561603378524e-05,
|
| 117104 |
+
"loss": 1.3729,
|
| 117105 |
+
"step": 16632
|
| 117106 |
+
},
|
| 117107 |
+
{
|
| 117108 |
+
"epoch": 1.7932456471349254,
|
| 117109 |
+
"grad_norm": 1.7819479703903198,
|
| 117110 |
+
"learning_rate": 6.989483747347478e-05,
|
| 117111 |
+
"loss": 2.41,
|
| 117112 |
+
"step": 16633
|
| 117113 |
+
},
|
| 117114 |
+
{
|
| 117115 |
+
"epoch": 1.7933534580346073,
|
| 117116 |
+
"grad_norm": 1.5250705480575562,
|
| 117117 |
+
"learning_rate": 6.988405929776265e-05,
|
| 117118 |
+
"loss": 1.6387,
|
| 117119 |
+
"step": 16634
|
| 117120 |
+
},
|
| 117121 |
+
{
|
| 117122 |
+
"epoch": 1.7934612689342893,
|
| 117123 |
+
"grad_norm": 1.595254898071289,
|
| 117124 |
+
"learning_rate": 6.987328150678651e-05,
|
| 117125 |
+
"loss": 1.2466,
|
| 117126 |
+
"step": 16635
|
| 117127 |
+
},
|
| 117128 |
+
{
|
| 117129 |
+
"epoch": 1.7935690798339712,
|
| 117130 |
+
"grad_norm": 1.7022086381912231,
|
| 117131 |
+
"learning_rate": 6.986250410068402e-05,
|
| 117132 |
+
"loss": 1.8353,
|
| 117133 |
+
"step": 16636
|
| 117134 |
+
},
|
| 117135 |
+
{
|
| 117136 |
+
"epoch": 1.793676890733653,
|
| 117137 |
+
"grad_norm": 1.8545401096343994,
|
| 117138 |
+
"learning_rate": 6.985172707959295e-05,
|
| 117139 |
+
"loss": 2.1976,
|
| 117140 |
+
"step": 16637
|
| 117141 |
+
},
|
| 117142 |
+
{
|
| 117143 |
+
"epoch": 1.793784701633335,
|
| 117144 |
+
"grad_norm": 1.6884146928787231,
|
| 117145 |
+
"learning_rate": 6.984095044365092e-05,
|
| 117146 |
+
"loss": 2.2716,
|
| 117147 |
+
"step": 16638
|
| 117148 |
+
},
|
| 117149 |
+
{
|
| 117150 |
+
"epoch": 1.793892512533017,
|
| 117151 |
+
"grad_norm": 1.5139479637145996,
|
| 117152 |
+
"learning_rate": 6.983017419299557e-05,
|
| 117153 |
+
"loss": 1.7601,
|
| 117154 |
+
"step": 16639
|
| 117155 |
+
},
|
| 117156 |
+
{
|
| 117157 |
+
"epoch": 1.7940003234326989,
|
| 117158 |
+
"grad_norm": 1.7244209051132202,
|
| 117159 |
+
"learning_rate": 6.981939832776464e-05,
|
| 117160 |
+
"loss": 1.9901,
|
| 117161 |
+
"step": 16640
|
| 117162 |
+
},
|
| 117163 |
+
{
|
| 117164 |
+
"epoch": 1.794108134332381,
|
| 117165 |
+
"grad_norm": 1.6403565406799316,
|
| 117166 |
+
"learning_rate": 6.980862284809575e-05,
|
| 117167 |
+
"loss": 1.9645,
|
| 117168 |
+
"step": 16641
|
| 117169 |
+
},
|
| 117170 |
+
{
|
| 117171 |
+
"epoch": 1.794215945232063,
|
| 117172 |
+
"grad_norm": 1.7007755041122437,
|
| 117173 |
+
"learning_rate": 6.979784775412659e-05,
|
| 117174 |
+
"loss": 1.7178,
|
| 117175 |
+
"step": 16642
|
| 117176 |
+
},
|
| 117177 |
+
{
|
| 117178 |
+
"epoch": 1.7943237561317449,
|
| 117179 |
+
"grad_norm": 1.489458680152893,
|
| 117180 |
+
"learning_rate": 6.978707304599475e-05,
|
| 117181 |
+
"loss": 1.662,
|
| 117182 |
+
"step": 16643
|
| 117183 |
+
},
|
| 117184 |
+
{
|
| 117185 |
+
"epoch": 1.794431567031427,
|
| 117186 |
+
"grad_norm": 1.5674692392349243,
|
| 117187 |
+
"learning_rate": 6.977629872383795e-05,
|
| 117188 |
+
"loss": 1.9158,
|
| 117189 |
+
"step": 16644
|
| 117190 |
+
},
|
| 117191 |
+
{
|
| 117192 |
+
"epoch": 1.794539377931109,
|
| 117193 |
+
"grad_norm": 1.600303053855896,
|
| 117194 |
+
"learning_rate": 6.976552478779381e-05,
|
| 117195 |
+
"loss": 2.2533,
|
| 117196 |
+
"step": 16645
|
| 117197 |
+
},
|
| 117198 |
+
{
|
| 117199 |
+
"epoch": 1.7946471888307909,
|
| 117200 |
+
"grad_norm": 1.416590929031372,
|
| 117201 |
+
"learning_rate": 6.975475123799993e-05,
|
| 117202 |
+
"loss": 1.5278,
|
| 117203 |
+
"step": 16646
|
| 117204 |
+
},
|
| 117205 |
+
{
|
| 117206 |
+
"epoch": 1.7947549997304728,
|
| 117207 |
+
"grad_norm": 1.7980154752731323,
|
| 117208 |
+
"learning_rate": 6.974397807459401e-05,
|
| 117209 |
+
"loss": 2.0584,
|
| 117210 |
+
"step": 16647
|
| 117211 |
+
},
|
| 117212 |
+
{
|
| 117213 |
+
"epoch": 1.7948628106301547,
|
| 117214 |
+
"grad_norm": 1.8561755418777466,
|
| 117215 |
+
"learning_rate": 6.973320529771365e-05,
|
| 117216 |
+
"loss": 1.947,
|
| 117217 |
+
"step": 16648
|
| 117218 |
+
},
|
| 117219 |
+
{
|
| 117220 |
+
"epoch": 1.7949706215298367,
|
| 117221 |
+
"grad_norm": 1.673102617263794,
|
| 117222 |
+
"learning_rate": 6.972243290749645e-05,
|
| 117223 |
+
"loss": 1.8987,
|
| 117224 |
+
"step": 16649
|
| 117225 |
+
},
|
| 117226 |
+
{
|
| 117227 |
+
"epoch": 1.7950784324295186,
|
| 117228 |
+
"grad_norm": 1.4816420078277588,
|
| 117229 |
+
"learning_rate": 6.971166090408005e-05,
|
| 117230 |
+
"loss": 1.7192,
|
| 117231 |
+
"step": 16650
|
| 117232 |
+
},
|
| 117233 |
+
{
|
| 117234 |
+
"epoch": 1.7951862433292005,
|
| 117235 |
+
"grad_norm": 1.7242459058761597,
|
| 117236 |
+
"learning_rate": 6.97008892876021e-05,
|
| 117237 |
+
"loss": 1.9235,
|
| 117238 |
+
"step": 16651
|
| 117239 |
+
},
|
| 117240 |
+
{
|
| 117241 |
+
"epoch": 1.7952940542288824,
|
| 117242 |
+
"grad_norm": 1.5770617723464966,
|
| 117243 |
+
"learning_rate": 6.969011805820014e-05,
|
| 117244 |
+
"loss": 1.9594,
|
| 117245 |
+
"step": 16652
|
| 117246 |
+
},
|
| 117247 |
+
{
|
| 117248 |
+
"epoch": 1.7954018651285644,
|
| 117249 |
+
"grad_norm": 1.7428151369094849,
|
| 117250 |
+
"learning_rate": 6.96793472160118e-05,
|
| 117251 |
+
"loss": 1.8302,
|
| 117252 |
+
"step": 16653
|
| 117253 |
+
},
|
| 117254 |
+
{
|
| 117255 |
+
"epoch": 1.7955096760282463,
|
| 117256 |
+
"grad_norm": 1.6318590641021729,
|
| 117257 |
+
"learning_rate": 6.96685767611747e-05,
|
| 117258 |
+
"loss": 2.3796,
|
| 117259 |
+
"step": 16654
|
| 117260 |
+
},
|
| 117261 |
+
{
|
| 117262 |
+
"epoch": 1.7956174869279284,
|
| 117263 |
+
"grad_norm": 1.3714919090270996,
|
| 117264 |
+
"learning_rate": 6.965780669382642e-05,
|
| 117265 |
+
"loss": 1.4396,
|
| 117266 |
+
"step": 16655
|
| 117267 |
+
},
|
| 117268 |
+
{
|
| 117269 |
+
"epoch": 1.7957252978276104,
|
| 117270 |
+
"grad_norm": 1.708219051361084,
|
| 117271 |
+
"learning_rate": 6.964703701410456e-05,
|
| 117272 |
+
"loss": 2.1678,
|
| 117273 |
+
"step": 16656
|
| 117274 |
+
},
|
| 117275 |
+
{
|
| 117276 |
+
"epoch": 1.7958331087272923,
|
| 117277 |
+
"grad_norm": 1.5859949588775635,
|
| 117278 |
+
"learning_rate": 6.963626772214667e-05,
|
| 117279 |
+
"loss": 1.8029,
|
| 117280 |
+
"step": 16657
|
| 117281 |
+
},
|
| 117282 |
+
{
|
| 117283 |
+
"epoch": 1.7959409196269744,
|
| 117284 |
+
"grad_norm": 1.7511671781539917,
|
| 117285 |
+
"learning_rate": 6.962549881809039e-05,
|
| 117286 |
+
"loss": 1.8144,
|
| 117287 |
+
"step": 16658
|
| 117288 |
+
},
|
| 117289 |
+
{
|
| 117290 |
+
"epoch": 1.7960487305266564,
|
| 117291 |
+
"grad_norm": 1.5485838651657104,
|
| 117292 |
+
"learning_rate": 6.96147303020732e-05,
|
| 117293 |
+
"loss": 1.9754,
|
| 117294 |
+
"step": 16659
|
| 117295 |
+
},
|
| 117296 |
+
{
|
| 117297 |
+
"epoch": 1.7961565414263383,
|
| 117298 |
+
"grad_norm": 1.6231766939163208,
|
| 117299 |
+
"learning_rate": 6.960396217423279e-05,
|
| 117300 |
+
"loss": 2.3127,
|
| 117301 |
+
"step": 16660
|
| 117302 |
+
},
|
| 117303 |
+
{
|
| 117304 |
+
"epoch": 1.7962643523260202,
|
| 117305 |
+
"grad_norm": 1.6254571676254272,
|
| 117306 |
+
"learning_rate": 6.959319443470664e-05,
|
| 117307 |
+
"loss": 2.0988,
|
| 117308 |
+
"step": 16661
|
| 117309 |
+
},
|
| 117310 |
+
{
|
| 117311 |
+
"epoch": 1.7963721632257021,
|
| 117312 |
+
"grad_norm": 1.4293440580368042,
|
| 117313 |
+
"learning_rate": 6.95824270836323e-05,
|
| 117314 |
+
"loss": 1.6768,
|
| 117315 |
+
"step": 16662
|
| 117316 |
+
},
|
| 117317 |
+
{
|
| 117318 |
+
"epoch": 1.796479974125384,
|
| 117319 |
+
"grad_norm": 1.474360466003418,
|
| 117320 |
+
"learning_rate": 6.957166012114739e-05,
|
| 117321 |
+
"loss": 1.943,
|
| 117322 |
+
"step": 16663
|
| 117323 |
+
},
|
| 117324 |
+
{
|
| 117325 |
+
"epoch": 1.796587785025066,
|
| 117326 |
+
"grad_norm": 1.3896758556365967,
|
| 117327 |
+
"learning_rate": 6.956089354738943e-05,
|
| 117328 |
+
"loss": 1.5368,
|
| 117329 |
+
"step": 16664
|
| 117330 |
+
},
|
| 117331 |
+
{
|
| 117332 |
+
"epoch": 1.796695595924748,
|
| 117333 |
+
"grad_norm": 1.6258090734481812,
|
| 117334 |
+
"learning_rate": 6.955012736249598e-05,
|
| 117335 |
+
"loss": 1.8852,
|
| 117336 |
+
"step": 16665
|
| 117337 |
+
},
|
| 117338 |
+
{
|
| 117339 |
+
"epoch": 1.7968034068244298,
|
| 117340 |
+
"grad_norm": 1.6771920919418335,
|
| 117341 |
+
"learning_rate": 6.95393615666045e-05,
|
| 117342 |
+
"loss": 2.0219,
|
| 117343 |
+
"step": 16666
|
| 117344 |
+
},
|
| 117345 |
+
{
|
| 117346 |
+
"epoch": 1.7969112177241118,
|
| 117347 |
+
"grad_norm": 1.5077306032180786,
|
| 117348 |
+
"learning_rate": 6.952859615985263e-05,
|
| 117349 |
+
"loss": 1.5201,
|
| 117350 |
+
"step": 16667
|
| 117351 |
+
},
|
| 117352 |
+
{
|
| 117353 |
+
"epoch": 1.7970190286237937,
|
| 117354 |
+
"grad_norm": 1.7441203594207764,
|
| 117355 |
+
"learning_rate": 6.951783114237787e-05,
|
| 117356 |
+
"loss": 2.258,
|
| 117357 |
+
"step": 16668
|
| 117358 |
+
},
|
| 117359 |
+
{
|
| 117360 |
+
"epoch": 1.7971268395234758,
|
| 117361 |
+
"grad_norm": 1.701608657836914,
|
| 117362 |
+
"learning_rate": 6.950706651431768e-05,
|
| 117363 |
+
"loss": 2.1716,
|
| 117364 |
+
"step": 16669
|
| 117365 |
+
},
|
| 117366 |
+
{
|
| 117367 |
+
"epoch": 1.7972346504231578,
|
| 117368 |
+
"grad_norm": 1.659839391708374,
|
| 117369 |
+
"learning_rate": 6.949630227580965e-05,
|
| 117370 |
+
"loss": 2.3214,
|
| 117371 |
+
"step": 16670
|
| 117372 |
+
},
|
| 117373 |
+
{
|
| 117374 |
+
"epoch": 1.7973424613228397,
|
| 117375 |
+
"grad_norm": 1.547916293144226,
|
| 117376 |
+
"learning_rate": 6.94855384269913e-05,
|
| 117377 |
+
"loss": 1.8496,
|
| 117378 |
+
"step": 16671
|
| 117379 |
+
},
|
| 117380 |
+
{
|
| 117381 |
+
"epoch": 1.7974502722225218,
|
| 117382 |
+
"grad_norm": 1.908344030380249,
|
| 117383 |
+
"learning_rate": 6.947477496800006e-05,
|
| 117384 |
+
"loss": 2.1804,
|
| 117385 |
+
"step": 16672
|
| 117386 |
+
},
|
| 117387 |
+
{
|
| 117388 |
+
"epoch": 1.7975580831222038,
|
| 117389 |
+
"grad_norm": 1.5153648853302002,
|
| 117390 |
+
"learning_rate": 6.946401189897353e-05,
|
| 117391 |
+
"loss": 1.4403,
|
| 117392 |
+
"step": 16673
|
| 117393 |
+
},
|
| 117394 |
+
{
|
| 117395 |
+
"epoch": 1.7976658940218857,
|
| 117396 |
+
"grad_norm": 1.6658467054367065,
|
| 117397 |
+
"learning_rate": 6.94532492200492e-05,
|
| 117398 |
+
"loss": 1.8435,
|
| 117399 |
+
"step": 16674
|
| 117400 |
+
},
|
| 117401 |
+
{
|
| 117402 |
+
"epoch": 1.7977737049215676,
|
| 117403 |
+
"grad_norm": 1.5365303754806519,
|
| 117404 |
+
"learning_rate": 6.944248693136451e-05,
|
| 117405 |
+
"loss": 1.6988,
|
| 117406 |
+
"step": 16675
|
| 117407 |
+
},
|
| 117408 |
+
{
|
| 117409 |
+
"epoch": 1.7978815158212496,
|
| 117410 |
+
"grad_norm": 1.8733201026916504,
|
| 117411 |
+
"learning_rate": 6.943172503305694e-05,
|
| 117412 |
+
"loss": 2.2118,
|
| 117413 |
+
"step": 16676
|
| 117414 |
+
},
|
| 117415 |
+
{
|
| 117416 |
+
"epoch": 1.7979893267209315,
|
| 117417 |
+
"grad_norm": 1.9925936460494995,
|
| 117418 |
+
"learning_rate": 6.942096352526403e-05,
|
| 117419 |
+
"loss": 2.1479,
|
| 117420 |
+
"step": 16677
|
| 117421 |
+
},
|
| 117422 |
+
{
|
| 117423 |
+
"epoch": 1.7980971376206134,
|
| 117424 |
+
"grad_norm": 1.9079327583312988,
|
| 117425 |
+
"learning_rate": 6.941020240812326e-05,
|
| 117426 |
+
"loss": 2.3212,
|
| 117427 |
+
"step": 16678
|
| 117428 |
+
},
|
| 117429 |
+
{
|
| 117430 |
+
"epoch": 1.7982049485202953,
|
| 117431 |
+
"grad_norm": 1.6285349130630493,
|
| 117432 |
+
"learning_rate": 6.939944168177204e-05,
|
| 117433 |
+
"loss": 1.7294,
|
| 117434 |
+
"step": 16679
|
| 117435 |
+
},
|
| 117436 |
+
{
|
| 117437 |
+
"epoch": 1.7983127594199773,
|
| 117438 |
+
"grad_norm": 1.6742137670516968,
|
| 117439 |
+
"learning_rate": 6.938868134634793e-05,
|
| 117440 |
+
"loss": 2.1908,
|
| 117441 |
+
"step": 16680
|
| 117442 |
+
},
|
| 117443 |
+
{
|
| 117444 |
+
"epoch": 1.7984205703196592,
|
| 117445 |
+
"grad_norm": 1.825148344039917,
|
| 117446 |
+
"learning_rate": 6.937792140198834e-05,
|
| 117447 |
+
"loss": 2.325,
|
| 117448 |
+
"step": 16681
|
| 117449 |
+
},
|
| 117450 |
+
{
|
| 117451 |
+
"epoch": 1.798528381219341,
|
| 117452 |
+
"grad_norm": 1.7976876497268677,
|
| 117453 |
+
"learning_rate": 6.936716184883071e-05,
|
| 117454 |
+
"loss": 1.8502,
|
| 117455 |
+
"step": 16682
|
| 117456 |
+
},
|
| 117457 |
+
{
|
| 117458 |
+
"epoch": 1.7986361921190233,
|
| 117459 |
+
"grad_norm": 1.6656286716461182,
|
| 117460 |
+
"learning_rate": 6.935640268701255e-05,
|
| 117461 |
+
"loss": 1.7964,
|
| 117462 |
+
"step": 16683
|
| 117463 |
+
},
|
| 117464 |
+
{
|
| 117465 |
+
"epoch": 1.7987440030187052,
|
| 117466 |
+
"grad_norm": 1.7943233251571655,
|
| 117467 |
+
"learning_rate": 6.93456439166713e-05,
|
| 117468 |
+
"loss": 1.7962,
|
| 117469 |
+
"step": 16684
|
| 117470 |
+
},
|
| 117471 |
+
{
|
| 117472 |
+
"epoch": 1.798851813918387,
|
| 117473 |
+
"grad_norm": 1.691943645477295,
|
| 117474 |
+
"learning_rate": 6.933488553794436e-05,
|
| 117475 |
+
"loss": 2.0101,
|
| 117476 |
+
"step": 16685
|
| 117477 |
+
},
|
| 117478 |
+
{
|
| 117479 |
+
"epoch": 1.7989596248180693,
|
| 117480 |
+
"grad_norm": 1.6314573287963867,
|
| 117481 |
+
"learning_rate": 6.932412755096919e-05,
|
| 117482 |
+
"loss": 1.5319,
|
| 117483 |
+
"step": 16686
|
| 117484 |
+
},
|
| 117485 |
+
{
|
| 117486 |
+
"epoch": 1.7990674357177512,
|
| 117487 |
+
"grad_norm": 1.734278678894043,
|
| 117488 |
+
"learning_rate": 6.931336995588329e-05,
|
| 117489 |
+
"loss": 2.1258,
|
| 117490 |
+
"step": 16687
|
| 117491 |
+
},
|
| 117492 |
+
{
|
| 117493 |
+
"epoch": 1.799175246617433,
|
| 117494 |
+
"grad_norm": 1.692072868347168,
|
| 117495 |
+
"learning_rate": 6.930261275282402e-05,
|
| 117496 |
+
"loss": 1.8127,
|
| 117497 |
+
"step": 16688
|
| 117498 |
+
},
|
| 117499 |
+
{
|
| 117500 |
+
"epoch": 1.799283057517115,
|
| 117501 |
+
"grad_norm": 1.6133331060409546,
|
| 117502 |
+
"learning_rate": 6.929185594192877e-05,
|
| 117503 |
+
"loss": 1.6538,
|
| 117504 |
+
"step": 16689
|
| 117505 |
+
},
|
| 117506 |
+
{
|
| 117507 |
+
"epoch": 1.799390868416797,
|
| 117508 |
+
"grad_norm": 1.7636916637420654,
|
| 117509 |
+
"learning_rate": 6.928109952333505e-05,
|
| 117510 |
+
"loss": 2.2695,
|
| 117511 |
+
"step": 16690
|
| 117512 |
+
},
|
| 117513 |
+
{
|
| 117514 |
+
"epoch": 1.7994986793164789,
|
| 117515 |
+
"grad_norm": 1.8702751398086548,
|
| 117516 |
+
"learning_rate": 6.927034349718022e-05,
|
| 117517 |
+
"loss": 2.2189,
|
| 117518 |
+
"step": 16691
|
| 117519 |
+
},
|
| 117520 |
+
{
|
| 117521 |
+
"epoch": 1.7996064902161608,
|
| 117522 |
+
"grad_norm": 1.7277159690856934,
|
| 117523 |
+
"learning_rate": 6.925958786360172e-05,
|
| 117524 |
+
"loss": 1.8473,
|
| 117525 |
+
"step": 16692
|
| 117526 |
+
},
|
| 117527 |
+
{
|
| 117528 |
+
"epoch": 1.7997143011158427,
|
| 117529 |
+
"grad_norm": 1.6851154565811157,
|
| 117530 |
+
"learning_rate": 6.924883262273691e-05,
|
| 117531 |
+
"loss": 2.0406,
|
| 117532 |
+
"step": 16693
|
| 117533 |
+
},
|
| 117534 |
+
{
|
| 117535 |
+
"epoch": 1.7998221120155247,
|
| 117536 |
+
"grad_norm": 1.64437997341156,
|
| 117537 |
+
"learning_rate": 6.923807777472327e-05,
|
| 117538 |
+
"loss": 1.6798,
|
| 117539 |
+
"step": 16694
|
| 117540 |
+
},
|
| 117541 |
+
{
|
| 117542 |
+
"epoch": 1.7999299229152066,
|
| 117543 |
+
"grad_norm": 1.562111735343933,
|
| 117544 |
+
"learning_rate": 6.922732331969807e-05,
|
| 117545 |
+
"loss": 1.8983,
|
| 117546 |
+
"step": 16695
|
| 117547 |
+
},
|
| 117548 |
+
{
|
| 117549 |
+
"epoch": 1.8000377338148887,
|
| 117550 |
+
"grad_norm": 1.4247664213180542,
|
| 117551 |
+
"learning_rate": 6.921656925779884e-05,
|
| 117552 |
+
"loss": 1.8932,
|
| 117553 |
+
"step": 16696
|
| 117554 |
+
},
|
| 117555 |
+
{
|
| 117556 |
+
"epoch": 1.8001455447145707,
|
| 117557 |
+
"grad_norm": 1.8603711128234863,
|
| 117558 |
+
"learning_rate": 6.920581558916291e-05,
|
| 117559 |
+
"loss": 2.2826,
|
| 117560 |
+
"step": 16697
|
| 117561 |
+
},
|
| 117562 |
+
{
|
| 117563 |
+
"epoch": 1.8002533556142526,
|
| 117564 |
+
"grad_norm": 1.4201916456222534,
|
| 117565 |
+
"learning_rate": 6.919506231392761e-05,
|
| 117566 |
+
"loss": 1.2012,
|
| 117567 |
+
"step": 16698
|
| 117568 |
+
},
|
| 117569 |
+
{
|
| 117570 |
+
"epoch": 1.8003611665139345,
|
| 117571 |
+
"grad_norm": 1.7984966039657593,
|
| 117572 |
+
"learning_rate": 6.918430943223035e-05,
|
| 117573 |
+
"loss": 2.1972,
|
| 117574 |
+
"step": 16699
|
| 117575 |
+
},
|
| 117576 |
+
{
|
| 117577 |
+
"epoch": 1.8004689774136167,
|
| 117578 |
+
"grad_norm": 1.6121435165405273,
|
| 117579 |
+
"learning_rate": 6.917355694420851e-05,
|
| 117580 |
+
"loss": 1.9988,
|
| 117581 |
+
"step": 16700
|
| 117582 |
+
},
|
| 117583 |
+
{
|
| 117584 |
+
"epoch": 1.8005767883132986,
|
| 117585 |
+
"grad_norm": 1.476011037826538,
|
| 117586 |
+
"learning_rate": 6.916280484999946e-05,
|
| 117587 |
+
"loss": 1.5434,
|
| 117588 |
+
"step": 16701
|
| 117589 |
+
},
|
| 117590 |
+
{
|
| 117591 |
+
"epoch": 1.8006845992129805,
|
| 117592 |
+
"grad_norm": 1.7522236108779907,
|
| 117593 |
+
"learning_rate": 6.915205314974053e-05,
|
| 117594 |
+
"loss": 2.1333,
|
| 117595 |
+
"step": 16702
|
| 117596 |
+
},
|
| 117597 |
+
{
|
| 117598 |
+
"epoch": 1.8007924101126624,
|
| 117599 |
+
"grad_norm": 1.8870905637741089,
|
| 117600 |
+
"learning_rate": 6.914130184356912e-05,
|
| 117601 |
+
"loss": 2.3645,
|
| 117602 |
+
"step": 16703
|
| 117603 |
+
},
|
| 117604 |
+
{
|
| 117605 |
+
"epoch": 1.8009002210123444,
|
| 117606 |
+
"grad_norm": 1.8422532081604004,
|
| 117607 |
+
"learning_rate": 6.913055093162254e-05,
|
| 117608 |
+
"loss": 1.8766,
|
| 117609 |
+
"step": 16704
|
| 117610 |
+
},
|
| 117611 |
+
{
|
| 117612 |
+
"epoch": 1.8010080319120263,
|
| 117613 |
+
"grad_norm": 1.4395442008972168,
|
| 117614 |
+
"learning_rate": 6.911980041403812e-05,
|
| 117615 |
+
"loss": 1.8167,
|
| 117616 |
+
"step": 16705
|
| 117617 |
+
},
|
| 117618 |
+
{
|
| 117619 |
+
"epoch": 1.8011158428117082,
|
| 117620 |
+
"grad_norm": 1.617555022239685,
|
| 117621 |
+
"learning_rate": 6.910905029095328e-05,
|
| 117622 |
+
"loss": 1.6376,
|
| 117623 |
+
"step": 16706
|
| 117624 |
+
},
|
| 117625 |
+
{
|
| 117626 |
+
"epoch": 1.8012236537113901,
|
| 117627 |
+
"grad_norm": 1.5927187204360962,
|
| 117628 |
+
"learning_rate": 6.909830056250527e-05,
|
| 117629 |
+
"loss": 1.9753,
|
| 117630 |
+
"step": 16707
|
| 117631 |
+
},
|
| 117632 |
+
{
|
| 117633 |
+
"epoch": 1.801331464611072,
|
| 117634 |
+
"grad_norm": 1.6366825103759766,
|
| 117635 |
+
"learning_rate": 6.908755122883143e-05,
|
| 117636 |
+
"loss": 1.9726,
|
| 117637 |
+
"step": 16708
|
| 117638 |
+
},
|
| 117639 |
+
{
|
| 117640 |
+
"epoch": 1.801439275510754,
|
| 117641 |
+
"grad_norm": 1.3605437278747559,
|
| 117642 |
+
"learning_rate": 6.907680229006914e-05,
|
| 117643 |
+
"loss": 1.3976,
|
| 117644 |
+
"step": 16709
|
| 117645 |
+
},
|
| 117646 |
+
{
|
| 117647 |
+
"epoch": 1.8015470864104361,
|
| 117648 |
+
"grad_norm": 1.739443302154541,
|
| 117649 |
+
"learning_rate": 6.906605374635569e-05,
|
| 117650 |
+
"loss": 1.4656,
|
| 117651 |
+
"step": 16710
|
| 117652 |
+
},
|
| 117653 |
+
{
|
| 117654 |
+
"epoch": 1.801654897310118,
|
| 117655 |
+
"grad_norm": 1.7897757291793823,
|
| 117656 |
+
"learning_rate": 6.90553055978284e-05,
|
| 117657 |
+
"loss": 2.1058,
|
| 117658 |
+
"step": 16711
|
| 117659 |
+
},
|
| 117660 |
+
{
|
| 117661 |
+
"epoch": 1.8017627082098,
|
| 117662 |
+
"grad_norm": 1.3502769470214844,
|
| 117663 |
+
"learning_rate": 6.904455784462451e-05,
|
| 117664 |
+
"loss": 1.3379,
|
| 117665 |
+
"step": 16712
|
| 117666 |
+
},
|
| 117667 |
+
{
|
| 117668 |
+
"epoch": 1.801870519109482,
|
| 117669 |
+
"grad_norm": 1.6332180500030518,
|
| 117670 |
+
"learning_rate": 6.903381048688144e-05,
|
| 117671 |
+
"loss": 1.8668,
|
| 117672 |
+
"step": 16713
|
| 117673 |
+
},
|
| 117674 |
+
{
|
| 117675 |
+
"epoch": 1.801978330009164,
|
| 117676 |
+
"grad_norm": 1.7011054754257202,
|
| 117677 |
+
"learning_rate": 6.90230635247364e-05,
|
| 117678 |
+
"loss": 2.3079,
|
| 117679 |
+
"step": 16714
|
| 117680 |
+
},
|
| 117681 |
+
{
|
| 117682 |
+
"epoch": 1.802086140908846,
|
| 117683 |
+
"grad_norm": 1.5191792249679565,
|
| 117684 |
+
"learning_rate": 6.90123169583267e-05,
|
| 117685 |
+
"loss": 2.0825,
|
| 117686 |
+
"step": 16715
|
| 117687 |
+
},
|
| 117688 |
+
{
|
| 117689 |
+
"epoch": 1.802193951808528,
|
| 117690 |
+
"grad_norm": 1.6270371675491333,
|
| 117691 |
+
"learning_rate": 6.900157078778969e-05,
|
| 117692 |
+
"loss": 1.9757,
|
| 117693 |
+
"step": 16716
|
| 117694 |
+
},
|
| 117695 |
+
{
|
| 117696 |
+
"epoch": 1.8023017627082099,
|
| 117697 |
+
"grad_norm": 1.386960506439209,
|
| 117698 |
+
"learning_rate": 6.899082501326261e-05,
|
| 117699 |
+
"loss": 1.411,
|
| 117700 |
+
"step": 16717
|
| 117701 |
+
},
|
| 117702 |
+
{
|
| 117703 |
+
"epoch": 1.8024095736078918,
|
| 117704 |
+
"grad_norm": 1.5525670051574707,
|
| 117705 |
+
"learning_rate": 6.898007963488269e-05,
|
| 117706 |
+
"loss": 1.8056,
|
| 117707 |
+
"step": 16718
|
| 117708 |
+
},
|
| 117709 |
+
{
|
| 117710 |
+
"epoch": 1.8025173845075737,
|
| 117711 |
+
"grad_norm": 1.6627105474472046,
|
| 117712 |
+
"learning_rate": 6.89693346527873e-05,
|
| 117713 |
+
"loss": 1.8423,
|
| 117714 |
+
"step": 16719
|
| 117715 |
+
},
|
| 117716 |
+
{
|
| 117717 |
+
"epoch": 1.8026251954072556,
|
| 117718 |
+
"grad_norm": 1.5929796695709229,
|
| 117719 |
+
"learning_rate": 6.895859006711365e-05,
|
| 117720 |
+
"loss": 1.3075,
|
| 117721 |
+
"step": 16720
|
| 117722 |
+
},
|
| 117723 |
+
{
|
| 117724 |
+
"epoch": 1.8027330063069376,
|
| 117725 |
+
"grad_norm": 1.6167912483215332,
|
| 117726 |
+
"learning_rate": 6.894784587799901e-05,
|
| 117727 |
+
"loss": 1.9247,
|
| 117728 |
+
"step": 16721
|
| 117729 |
+
},
|
| 117730 |
+
{
|
| 117731 |
+
"epoch": 1.8028408172066195,
|
| 117732 |
+
"grad_norm": 2.0499706268310547,
|
| 117733 |
+
"learning_rate": 6.893710208558061e-05,
|
| 117734 |
+
"loss": 2.0098,
|
| 117735 |
+
"step": 16722
|
| 117736 |
+
},
|
| 117737 |
+
{
|
| 117738 |
+
"epoch": 1.8029486281063014,
|
| 117739 |
+
"grad_norm": 1.7318973541259766,
|
| 117740 |
+
"learning_rate": 6.892635868999578e-05,
|
| 117741 |
+
"loss": 1.8704,
|
| 117742 |
+
"step": 16723
|
| 117743 |
+
},
|
| 117744 |
+
{
|
| 117745 |
+
"epoch": 1.8030564390059836,
|
| 117746 |
+
"grad_norm": 1.7728049755096436,
|
| 117747 |
+
"learning_rate": 6.891561569138173e-05,
|
| 117748 |
+
"loss": 2.0986,
|
| 117749 |
+
"step": 16724
|
| 117750 |
+
},
|
| 117751 |
+
{
|
| 117752 |
+
"epoch": 1.8031642499056655,
|
| 117753 |
+
"grad_norm": 1.5714805126190186,
|
| 117754 |
+
"learning_rate": 6.890487308987567e-05,
|
| 117755 |
+
"loss": 1.554,
|
| 117756 |
+
"step": 16725
|
| 117757 |
+
},
|
| 117758 |
+
{
|
| 117759 |
+
"epoch": 1.8032720608053474,
|
| 117760 |
+
"grad_norm": 1.5196635723114014,
|
| 117761 |
+
"learning_rate": 6.88941308856149e-05,
|
| 117762 |
+
"loss": 1.5111,
|
| 117763 |
+
"step": 16726
|
| 117764 |
+
},
|
| 117765 |
+
{
|
| 117766 |
+
"epoch": 1.8033798717050293,
|
| 117767 |
+
"grad_norm": 1.8843687772750854,
|
| 117768 |
+
"learning_rate": 6.888338907873662e-05,
|
| 117769 |
+
"loss": 2.74,
|
| 117770 |
+
"step": 16727
|
| 117771 |
+
},
|
| 117772 |
+
{
|
| 117773 |
+
"epoch": 1.8034876826047115,
|
| 117774 |
+
"grad_norm": 1.6899522542953491,
|
| 117775 |
+
"learning_rate": 6.887264766937801e-05,
|
| 117776 |
+
"loss": 1.8436,
|
| 117777 |
+
"step": 16728
|
| 117778 |
+
},
|
| 117779 |
+
{
|
| 117780 |
+
"epoch": 1.8035954935043934,
|
| 117781 |
+
"grad_norm": 1.702290415763855,
|
| 117782 |
+
"learning_rate": 6.886190665767638e-05,
|
| 117783 |
+
"loss": 1.8139,
|
| 117784 |
+
"step": 16729
|
| 117785 |
+
},
|
| 117786 |
+
{
|
| 117787 |
+
"epoch": 1.8037033044040753,
|
| 117788 |
+
"grad_norm": 1.594366192817688,
|
| 117789 |
+
"learning_rate": 6.885116604376891e-05,
|
| 117790 |
+
"loss": 1.8393,
|
| 117791 |
+
"step": 16730
|
| 117792 |
+
},
|
| 117793 |
+
{
|
| 117794 |
+
"epoch": 1.8038111153037573,
|
| 117795 |
+
"grad_norm": 1.6726011037826538,
|
| 117796 |
+
"learning_rate": 6.884042582779284e-05,
|
| 117797 |
+
"loss": 2.0486,
|
| 117798 |
+
"step": 16731
|
| 117799 |
+
},
|
| 117800 |
+
{
|
| 117801 |
+
"epoch": 1.8039189262034392,
|
| 117802 |
+
"grad_norm": 1.5996594429016113,
|
| 117803 |
+
"learning_rate": 6.882968600988529e-05,
|
| 117804 |
+
"loss": 2.0189,
|
| 117805 |
+
"step": 16732
|
| 117806 |
+
},
|
| 117807 |
+
{
|
| 117808 |
+
"epoch": 1.8040267371031211,
|
| 117809 |
+
"grad_norm": 1.741853952407837,
|
| 117810 |
+
"learning_rate": 6.881894659018359e-05,
|
| 117811 |
+
"loss": 1.9873,
|
| 117812 |
+
"step": 16733
|
| 117813 |
+
},
|
| 117814 |
+
{
|
| 117815 |
+
"epoch": 1.804134548002803,
|
| 117816 |
+
"grad_norm": 1.5422440767288208,
|
| 117817 |
+
"learning_rate": 6.880820756882486e-05,
|
| 117818 |
+
"loss": 1.6827,
|
| 117819 |
+
"step": 16734
|
| 117820 |
+
},
|
| 117821 |
+
{
|
| 117822 |
+
"epoch": 1.804242358902485,
|
| 117823 |
+
"grad_norm": 1.3859449625015259,
|
| 117824 |
+
"learning_rate": 6.879746894594626e-05,
|
| 117825 |
+
"loss": 1.8771,
|
| 117826 |
+
"step": 16735
|
| 117827 |
+
},
|
| 117828 |
+
{
|
| 117829 |
+
"epoch": 1.804350169802167,
|
| 117830 |
+
"grad_norm": 1.8384952545166016,
|
| 117831 |
+
"learning_rate": 6.878673072168504e-05,
|
| 117832 |
+
"loss": 2.4136,
|
| 117833 |
+
"step": 16736
|
| 117834 |
+
},
|
| 117835 |
+
{
|
| 117836 |
+
"epoch": 1.8044579807018488,
|
| 117837 |
+
"grad_norm": 1.6659585237503052,
|
| 117838 |
+
"learning_rate": 6.87759928961784e-05,
|
| 117839 |
+
"loss": 1.9412,
|
| 117840 |
+
"step": 16737
|
| 117841 |
+
},
|
| 117842 |
+
{
|
| 117843 |
+
"epoch": 1.804565791601531,
|
| 117844 |
+
"grad_norm": 1.659032940864563,
|
| 117845 |
+
"learning_rate": 6.876525546956344e-05,
|
| 117846 |
+
"loss": 1.7517,
|
| 117847 |
+
"step": 16738
|
| 117848 |
+
},
|
| 117849 |
+
{
|
| 117850 |
+
"epoch": 1.804673602501213,
|
| 117851 |
+
"grad_norm": 1.5917338132858276,
|
| 117852 |
+
"learning_rate": 6.875451844197742e-05,
|
| 117853 |
+
"loss": 1.4629,
|
| 117854 |
+
"step": 16739
|
| 117855 |
+
},
|
| 117856 |
+
{
|
| 117857 |
+
"epoch": 1.8047814134008948,
|
| 117858 |
+
"grad_norm": 1.6696293354034424,
|
| 117859 |
+
"learning_rate": 6.874378181355744e-05,
|
| 117860 |
+
"loss": 2.1182,
|
| 117861 |
+
"step": 16740
|
| 117862 |
+
},
|
| 117863 |
+
{
|
| 117864 |
+
"epoch": 1.8048892243005767,
|
| 117865 |
+
"grad_norm": 1.7862634658813477,
|
| 117866 |
+
"learning_rate": 6.873304558444068e-05,
|
| 117867 |
+
"loss": 2.1774,
|
| 117868 |
+
"step": 16741
|
| 117869 |
+
},
|
| 117870 |
+
{
|
| 117871 |
+
"epoch": 1.804997035200259,
|
| 117872 |
+
"grad_norm": 1.7081775665283203,
|
| 117873 |
+
"learning_rate": 6.872230975476432e-05,
|
| 117874 |
+
"loss": 2.3677,
|
| 117875 |
+
"step": 16742
|
| 117876 |
+
},
|
| 117877 |
+
{
|
| 117878 |
+
"epoch": 1.8051048460999408,
|
| 117879 |
+
"grad_norm": 1.6936722993850708,
|
| 117880 |
+
"learning_rate": 6.871157432466548e-05,
|
| 117881 |
+
"loss": 1.8403,
|
| 117882 |
+
"step": 16743
|
| 117883 |
+
},
|
| 117884 |
+
{
|
| 117885 |
+
"epoch": 1.8052126569996227,
|
| 117886 |
+
"grad_norm": 1.7523459196090698,
|
| 117887 |
+
"learning_rate": 6.870083929428132e-05,
|
| 117888 |
+
"loss": 1.9212,
|
| 117889 |
+
"step": 16744
|
| 117890 |
+
},
|
| 117891 |
+
{
|
| 117892 |
+
"epoch": 1.8053204678993047,
|
| 117893 |
+
"grad_norm": 1.7130695581436157,
|
| 117894 |
+
"learning_rate": 6.869010466374896e-05,
|
| 117895 |
+
"loss": 2.2325,
|
| 117896 |
+
"step": 16745
|
| 117897 |
+
},
|
| 117898 |
+
{
|
| 117899 |
+
"epoch": 1.8054282787989866,
|
| 117900 |
+
"grad_norm": 1.6206870079040527,
|
| 117901 |
+
"learning_rate": 6.867937043320562e-05,
|
| 117902 |
+
"loss": 1.8481,
|
| 117903 |
+
"step": 16746
|
| 117904 |
+
},
|
| 117905 |
+
{
|
| 117906 |
+
"epoch": 1.8055360896986685,
|
| 117907 |
+
"grad_norm": 1.7568542957305908,
|
| 117908 |
+
"learning_rate": 6.866863660278835e-05,
|
| 117909 |
+
"loss": 2.1993,
|
| 117910 |
+
"step": 16747
|
| 117911 |
+
},
|
| 117912 |
+
{
|
| 117913 |
+
"epoch": 1.8056439005983504,
|
| 117914 |
+
"grad_norm": 1.5495140552520752,
|
| 117915 |
+
"learning_rate": 6.865790317263425e-05,
|
| 117916 |
+
"loss": 1.8134,
|
| 117917 |
+
"step": 16748
|
| 117918 |
+
},
|
| 117919 |
+
{
|
| 117920 |
+
"epoch": 1.8057517114980324,
|
| 117921 |
+
"grad_norm": 1.7852915525436401,
|
| 117922 |
+
"learning_rate": 6.864717014288055e-05,
|
| 117923 |
+
"loss": 2.4557,
|
| 117924 |
+
"step": 16749
|
| 117925 |
+
},
|
| 117926 |
+
{
|
| 117927 |
+
"epoch": 1.8058595223977143,
|
| 117928 |
+
"grad_norm": 1.5428017377853394,
|
| 117929 |
+
"learning_rate": 6.863643751366427e-05,
|
| 117930 |
+
"loss": 1.9302,
|
| 117931 |
+
"step": 16750
|
| 117932 |
+
},
|
| 117933 |
+
{
|
| 117934 |
+
"epoch": 1.8059673332973962,
|
| 117935 |
+
"grad_norm": 1.741347074508667,
|
| 117936 |
+
"learning_rate": 6.862570528512256e-05,
|
| 117937 |
+
"loss": 2.0634,
|
| 117938 |
+
"step": 16751
|
| 117939 |
+
},
|
| 117940 |
+
{
|
| 117941 |
+
"epoch": 1.8060751441970784,
|
| 117942 |
+
"grad_norm": 1.349820852279663,
|
| 117943 |
+
"learning_rate": 6.861497345739251e-05,
|
| 117944 |
+
"loss": 1.5542,
|
| 117945 |
+
"step": 16752
|
| 117946 |
+
},
|
| 117947 |
+
{
|
| 117948 |
+
"epoch": 1.8061829550967603,
|
| 117949 |
+
"grad_norm": 1.7314796447753906,
|
| 117950 |
+
"learning_rate": 6.860424203061126e-05,
|
| 117951 |
+
"loss": 2.1364,
|
| 117952 |
+
"step": 16753
|
| 117953 |
+
},
|
| 117954 |
+
{
|
| 117955 |
+
"epoch": 1.8062907659964422,
|
| 117956 |
+
"grad_norm": 1.6218026876449585,
|
| 117957 |
+
"learning_rate": 6.859351100491588e-05,
|
| 117958 |
+
"loss": 1.698,
|
| 117959 |
+
"step": 16754
|
| 117960 |
+
},
|
| 117961 |
+
{
|
| 117962 |
+
"epoch": 1.8063985768961242,
|
| 117963 |
+
"grad_norm": 1.8918417692184448,
|
| 117964 |
+
"learning_rate": 6.858278038044344e-05,
|
| 117965 |
+
"loss": 2.3749,
|
| 117966 |
+
"step": 16755
|
| 117967 |
+
},
|
| 117968 |
+
{
|
| 117969 |
+
"epoch": 1.8065063877958063,
|
| 117970 |
+
"grad_norm": 1.5193601846694946,
|
| 117971 |
+
"learning_rate": 6.857205015733105e-05,
|
| 117972 |
+
"loss": 1.398,
|
| 117973 |
+
"step": 16756
|
| 117974 |
+
},
|
| 117975 |
+
{
|
| 117976 |
+
"epoch": 1.8066141986954882,
|
| 117977 |
+
"grad_norm": 1.6527878046035767,
|
| 117978 |
+
"learning_rate": 6.856132033571578e-05,
|
| 117979 |
+
"loss": 2.1377,
|
| 117980 |
+
"step": 16757
|
| 117981 |
+
},
|
| 117982 |
+
{
|
| 117983 |
+
"epoch": 1.8067220095951702,
|
| 117984 |
+
"grad_norm": 1.6129385232925415,
|
| 117985 |
+
"learning_rate": 6.855059091573472e-05,
|
| 117986 |
+
"loss": 1.954,
|
| 117987 |
+
"step": 16758
|
| 117988 |
+
},
|
| 117989 |
+
{
|
| 117990 |
+
"epoch": 1.806829820494852,
|
| 117991 |
+
"grad_norm": 1.9749447107315063,
|
| 117992 |
+
"learning_rate": 6.853986189752492e-05,
|
| 117993 |
+
"loss": 2.6386,
|
| 117994 |
+
"step": 16759
|
| 117995 |
+
},
|
| 117996 |
+
{
|
| 117997 |
+
"epoch": 1.806937631394534,
|
| 117998 |
+
"grad_norm": 1.6756690740585327,
|
| 117999 |
+
"learning_rate": 6.852913328122348e-05,
|
| 118000 |
+
"loss": 1.9199,
|
| 118001 |
+
"step": 16760
|
| 118002 |
+
},
|
| 118003 |
+
{
|
| 118004 |
+
"epoch": 1.807045442294216,
|
| 118005 |
+
"grad_norm": 1.8738325834274292,
|
| 118006 |
+
"learning_rate": 6.851840506696738e-05,
|
| 118007 |
+
"loss": 2.1375,
|
| 118008 |
+
"step": 16761
|
| 118009 |
+
},
|
| 118010 |
+
{
|
| 118011 |
+
"epoch": 1.8071532531938979,
|
| 118012 |
+
"grad_norm": 1.7268435955047607,
|
| 118013 |
+
"learning_rate": 6.850767725489378e-05,
|
| 118014 |
+
"loss": 2.0061,
|
| 118015 |
+
"step": 16762
|
| 118016 |
+
},
|
| 118017 |
+
{
|
| 118018 |
+
"epoch": 1.8072610640935798,
|
| 118019 |
+
"grad_norm": 1.6114068031311035,
|
| 118020 |
+
"learning_rate": 6.849694984513968e-05,
|
| 118021 |
+
"loss": 1.8376,
|
| 118022 |
+
"step": 16763
|
| 118023 |
+
},
|
| 118024 |
+
{
|
| 118025 |
+
"epoch": 1.8073688749932617,
|
| 118026 |
+
"grad_norm": 1.6898713111877441,
|
| 118027 |
+
"learning_rate": 6.848622283784209e-05,
|
| 118028 |
+
"loss": 1.4137,
|
| 118029 |
+
"step": 16764
|
| 118030 |
+
},
|
| 118031 |
+
{
|
| 118032 |
+
"epoch": 1.8074766858929436,
|
| 118033 |
+
"grad_norm": 1.7274880409240723,
|
| 118034 |
+
"learning_rate": 6.847549623313812e-05,
|
| 118035 |
+
"loss": 1.445,
|
| 118036 |
+
"step": 16765
|
| 118037 |
+
},
|
| 118038 |
+
{
|
| 118039 |
+
"epoch": 1.8075844967926258,
|
| 118040 |
+
"grad_norm": 1.5099462270736694,
|
| 118041 |
+
"learning_rate": 6.846477003116474e-05,
|
| 118042 |
+
"loss": 1.957,
|
| 118043 |
+
"step": 16766
|
| 118044 |
+
},
|
| 118045 |
+
{
|
| 118046 |
+
"epoch": 1.8076923076923077,
|
| 118047 |
+
"grad_norm": 1.7615904808044434,
|
| 118048 |
+
"learning_rate": 6.845404423205902e-05,
|
| 118049 |
+
"loss": 2.3326,
|
| 118050 |
+
"step": 16767
|
| 118051 |
+
},
|
| 118052 |
+
{
|
| 118053 |
+
"epoch": 1.8078001185919896,
|
| 118054 |
+
"grad_norm": 1.7697466611862183,
|
| 118055 |
+
"learning_rate": 6.844331883595794e-05,
|
| 118056 |
+
"loss": 1.9051,
|
| 118057 |
+
"step": 16768
|
| 118058 |
+
},
|
| 118059 |
+
{
|
| 118060 |
+
"epoch": 1.8079079294916716,
|
| 118061 |
+
"grad_norm": 1.5290441513061523,
|
| 118062 |
+
"learning_rate": 6.843259384299858e-05,
|
| 118063 |
+
"loss": 1.8059,
|
| 118064 |
+
"step": 16769
|
| 118065 |
+
},
|
| 118066 |
+
{
|
| 118067 |
+
"epoch": 1.8080157403913537,
|
| 118068 |
+
"grad_norm": 1.6439237594604492,
|
| 118069 |
+
"learning_rate": 6.842186925331793e-05,
|
| 118070 |
+
"loss": 2.2458,
|
| 118071 |
+
"step": 16770
|
| 118072 |
+
},
|
| 118073 |
+
{
|
| 118074 |
+
"epoch": 1.8081235512910356,
|
| 118075 |
+
"grad_norm": 1.8173085451126099,
|
| 118076 |
+
"learning_rate": 6.841114506705295e-05,
|
| 118077 |
+
"loss": 1.9412,
|
| 118078 |
+
"step": 16771
|
| 118079 |
+
},
|
| 118080 |
+
{
|
| 118081 |
+
"epoch": 1.8082313621907176,
|
| 118082 |
+
"grad_norm": 1.658616304397583,
|
| 118083 |
+
"learning_rate": 6.840042128434071e-05,
|
| 118084 |
+
"loss": 2.0217,
|
| 118085 |
+
"step": 16772
|
| 118086 |
+
},
|
| 118087 |
+
{
|
| 118088 |
+
"epoch": 1.8083391730903995,
|
| 118089 |
+
"grad_norm": 1.5304282903671265,
|
| 118090 |
+
"learning_rate": 6.838969790531817e-05,
|
| 118091 |
+
"loss": 1.4336,
|
| 118092 |
+
"step": 16773
|
| 118093 |
+
},
|
| 118094 |
+
{
|
| 118095 |
+
"epoch": 1.8084469839900814,
|
| 118096 |
+
"grad_norm": 1.6302284002304077,
|
| 118097 |
+
"learning_rate": 6.837897493012232e-05,
|
| 118098 |
+
"loss": 1.8342,
|
| 118099 |
+
"step": 16774
|
| 118100 |
+
},
|
| 118101 |
+
{
|
| 118102 |
+
"epoch": 1.8085547948897633,
|
| 118103 |
+
"grad_norm": 1.699869990348816,
|
| 118104 |
+
"learning_rate": 6.83682523588902e-05,
|
| 118105 |
+
"loss": 1.7419,
|
| 118106 |
+
"step": 16775
|
| 118107 |
+
},
|
| 118108 |
+
{
|
| 118109 |
+
"epoch": 1.8086626057894453,
|
| 118110 |
+
"grad_norm": 1.3302178382873535,
|
| 118111 |
+
"learning_rate": 6.835753019175875e-05,
|
| 118112 |
+
"loss": 1.4837,
|
| 118113 |
+
"step": 16776
|
| 118114 |
+
},
|
| 118115 |
+
{
|
| 118116 |
+
"epoch": 1.8087704166891272,
|
| 118117 |
+
"grad_norm": 1.7208220958709717,
|
| 118118 |
+
"learning_rate": 6.834680842886496e-05,
|
| 118119 |
+
"loss": 1.9933,
|
| 118120 |
+
"step": 16777
|
| 118121 |
+
},
|
| 118122 |
+
{
|
| 118123 |
+
"epoch": 1.8088782275888091,
|
| 118124 |
+
"grad_norm": 1.696081280708313,
|
| 118125 |
+
"learning_rate": 6.833608707034573e-05,
|
| 118126 |
+
"loss": 1.7618,
|
| 118127 |
+
"step": 16778
|
| 118128 |
+
},
|
| 118129 |
+
{
|
| 118130 |
+
"epoch": 1.808986038488491,
|
| 118131 |
+
"grad_norm": 1.6352111101150513,
|
| 118132 |
+
"learning_rate": 6.832536611633817e-05,
|
| 118133 |
+
"loss": 1.9259,
|
| 118134 |
+
"step": 16779
|
| 118135 |
+
},
|
| 118136 |
+
{
|
| 118137 |
+
"epoch": 1.8090938493881732,
|
| 118138 |
+
"grad_norm": 1.3767921924591064,
|
| 118139 |
+
"learning_rate": 6.83146455669791e-05,
|
| 118140 |
+
"loss": 1.1781,
|
| 118141 |
+
"step": 16780
|
| 118142 |
+
},
|
| 118143 |
+
{
|
| 118144 |
+
"epoch": 1.8092016602878551,
|
| 118145 |
+
"grad_norm": 1.8465744256973267,
|
| 118146 |
+
"learning_rate": 6.830392542240556e-05,
|
| 118147 |
+
"loss": 2.3951,
|
| 118148 |
+
"step": 16781
|
| 118149 |
+
},
|
| 118150 |
+
{
|
| 118151 |
+
"epoch": 1.809309471187537,
|
| 118152 |
+
"grad_norm": 1.5014816522598267,
|
| 118153 |
+
"learning_rate": 6.82932056827545e-05,
|
| 118154 |
+
"loss": 2.0813,
|
| 118155 |
+
"step": 16782
|
| 118156 |
+
},
|
| 118157 |
+
{
|
| 118158 |
+
"epoch": 1.809417282087219,
|
| 118159 |
+
"grad_norm": 1.6501123905181885,
|
| 118160 |
+
"learning_rate": 6.828248634816285e-05,
|
| 118161 |
+
"loss": 1.7961,
|
| 118162 |
+
"step": 16783
|
| 118163 |
+
},
|
| 118164 |
+
{
|
| 118165 |
+
"epoch": 1.8095250929869011,
|
| 118166 |
+
"grad_norm": 1.7680307626724243,
|
| 118167 |
+
"learning_rate": 6.82717674187675e-05,
|
| 118168 |
+
"loss": 2.0478,
|
| 118169 |
+
"step": 16784
|
| 118170 |
+
},
|
| 118171 |
+
{
|
| 118172 |
+
"epoch": 1.809632903886583,
|
| 118173 |
+
"grad_norm": 1.593162178993225,
|
| 118174 |
+
"learning_rate": 6.826104889470551e-05,
|
| 118175 |
+
"loss": 2.287,
|
| 118176 |
+
"step": 16785
|
| 118177 |
+
},
|
| 118178 |
+
{
|
| 118179 |
+
"epoch": 1.809740714786265,
|
| 118180 |
+
"grad_norm": 1.5001581907272339,
|
| 118181 |
+
"learning_rate": 6.825033077611369e-05,
|
| 118182 |
+
"loss": 1.7892,
|
| 118183 |
+
"step": 16786
|
| 118184 |
+
},
|
| 118185 |
+
{
|
| 118186 |
+
"epoch": 1.809848525685947,
|
| 118187 |
+
"grad_norm": 1.5573759078979492,
|
| 118188 |
+
"learning_rate": 6.8239613063129e-05,
|
| 118189 |
+
"loss": 1.9807,
|
| 118190 |
+
"step": 16787
|
| 118191 |
+
},
|
| 118192 |
+
{
|
| 118193 |
+
"epoch": 1.8099563365856288,
|
| 118194 |
+
"grad_norm": 1.467556357383728,
|
| 118195 |
+
"learning_rate": 6.822889575588838e-05,
|
| 118196 |
+
"loss": 1.822,
|
| 118197 |
+
"step": 16788
|
| 118198 |
+
},
|
| 118199 |
+
{
|
| 118200 |
+
"epoch": 1.8100641474853107,
|
| 118201 |
+
"grad_norm": 1.5728412866592407,
|
| 118202 |
+
"learning_rate": 6.821817885452877e-05,
|
| 118203 |
+
"loss": 1.9902,
|
| 118204 |
+
"step": 16789
|
| 118205 |
+
},
|
| 118206 |
+
{
|
| 118207 |
+
"epoch": 1.8101719583849927,
|
| 118208 |
+
"grad_norm": 1.6717913150787354,
|
| 118209 |
+
"learning_rate": 6.820746235918705e-05,
|
| 118210 |
+
"loss": 2.1965,
|
| 118211 |
+
"step": 16790
|
| 118212 |
+
},
|
| 118213 |
+
{
|
| 118214 |
+
"epoch": 1.8102797692846746,
|
| 118215 |
+
"grad_norm": 1.604662299156189,
|
| 118216 |
+
"learning_rate": 6.819674627000006e-05,
|
| 118217 |
+
"loss": 1.7346,
|
| 118218 |
+
"step": 16791
|
| 118219 |
+
},
|
| 118220 |
+
{
|
| 118221 |
+
"epoch": 1.8103875801843565,
|
| 118222 |
+
"grad_norm": 1.5487961769104004,
|
| 118223 |
+
"learning_rate": 6.818603058710482e-05,
|
| 118224 |
+
"loss": 1.6207,
|
| 118225 |
+
"step": 16792
|
| 118226 |
+
},
|
| 118227 |
+
{
|
| 118228 |
+
"epoch": 1.8104953910840385,
|
| 118229 |
+
"grad_norm": 1.6253288984298706,
|
| 118230 |
+
"learning_rate": 6.817531531063814e-05,
|
| 118231 |
+
"loss": 2.0941,
|
| 118232 |
+
"step": 16793
|
| 118233 |
+
},
|
| 118234 |
+
{
|
| 118235 |
+
"epoch": 1.8106032019837206,
|
| 118236 |
+
"grad_norm": 1.8032333850860596,
|
| 118237 |
+
"learning_rate": 6.816460044073693e-05,
|
| 118238 |
+
"loss": 2.5082,
|
| 118239 |
+
"step": 16794
|
| 118240 |
+
},
|
| 118241 |
+
{
|
| 118242 |
+
"epoch": 1.8107110128834025,
|
| 118243 |
+
"grad_norm": 1.5565704107284546,
|
| 118244 |
+
"learning_rate": 6.81538859775381e-05,
|
| 118245 |
+
"loss": 1.9145,
|
| 118246 |
+
"step": 16795
|
| 118247 |
+
},
|
| 118248 |
+
{
|
| 118249 |
+
"epoch": 1.8108188237830845,
|
| 118250 |
+
"grad_norm": 1.5818214416503906,
|
| 118251 |
+
"learning_rate": 6.814317192117852e-05,
|
| 118252 |
+
"loss": 2.0734,
|
| 118253 |
+
"step": 16796
|
| 118254 |
+
},
|
| 118255 |
+
{
|
| 118256 |
+
"epoch": 1.8109266346827664,
|
| 118257 |
+
"grad_norm": 1.6937018632888794,
|
| 118258 |
+
"learning_rate": 6.813245827179501e-05,
|
| 118259 |
+
"loss": 1.7444,
|
| 118260 |
+
"step": 16797
|
| 118261 |
+
},
|
| 118262 |
+
{
|
| 118263 |
+
"epoch": 1.8110344455824485,
|
| 118264 |
+
"grad_norm": 1.534384846687317,
|
| 118265 |
+
"learning_rate": 6.812174502952453e-05,
|
| 118266 |
+
"loss": 1.8542,
|
| 118267 |
+
"step": 16798
|
| 118268 |
+
},
|
| 118269 |
+
{
|
| 118270 |
+
"epoch": 1.8111422564821305,
|
| 118271 |
+
"grad_norm": 1.6197118759155273,
|
| 118272 |
+
"learning_rate": 6.81110321945039e-05,
|
| 118273 |
+
"loss": 1.9892,
|
| 118274 |
+
"step": 16799
|
| 118275 |
+
},
|
| 118276 |
+
{
|
| 118277 |
+
"epoch": 1.8112500673818124,
|
| 118278 |
+
"grad_norm": 1.4124948978424072,
|
| 118279 |
+
"learning_rate": 6.810031976686994e-05,
|
| 118280 |
+
"loss": 1.4515,
|
| 118281 |
+
"step": 16800
|
| 118282 |
+
},
|
| 118283 |
+
{
|
| 118284 |
+
"epoch": 1.8112500673818124,
|
| 118285 |
+
"eval_loss": 1.9967706203460693,
|
| 118286 |
+
"eval_runtime": 16.5872,
|
| 118287 |
+
"eval_samples_per_second": 33.761,
|
| 118288 |
+
"eval_steps_per_second": 11.274,
|
| 118289 |
+
"step": 16800
|
| 118290 |
}
|
| 118291 |
],
|
| 118292 |
"logging_steps": 1,
|
|
|
|
| 118315 |
"attributes": {}
|
| 118316 |
}
|
| 118317 |
},
|
| 118318 |
+
"total_flos": 2.400554180766597e+17,
|
| 118319 |
"train_batch_size": 3,
|
| 118320 |
"trial_name": null,
|
| 118321 |
"trial_params": null
|