Training in progress, step 1400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 319876032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f824383cc907e0192634cef0a3c1d574717c7ebae1cc469699289b37f602bde
|
| 3 |
size 319876032
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 244153300
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49180115137395021a0e586c502dbdfbab294ff53e337281e9477e0dbaa17f6f
|
| 3 |
size 244153300
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bb2e7995a85e3c3e764a36f042343b83a62dc31612ca9c1daf7fb0fbe3140a5
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55817aaa7ec585cd6ae6f5f8c8c1b0cda446d666cfcbb0a5597557162064080f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.45611435174942017,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-800",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -8463,6 +8463,1414 @@
|
|
| 8463 |
"eval_samples_per_second": 5.482,
|
| 8464 |
"eval_steps_per_second": 5.482,
|
| 8465 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8466 |
}
|
| 8467 |
],
|
| 8468 |
"logging_steps": 1,
|
|
@@ -8477,7 +9885,7 @@
|
|
| 8477 |
"early_stopping_threshold": 0.0
|
| 8478 |
},
|
| 8479 |
"attributes": {
|
| 8480 |
-
"early_stopping_patience_counter":
|
| 8481 |
}
|
| 8482 |
},
|
| 8483 |
"TrainerControl": {
|
|
@@ -8491,7 +9899,7 @@
|
|
| 8491 |
"attributes": {}
|
| 8492 |
}
|
| 8493 |
},
|
| 8494 |
-
"total_flos":
|
| 8495 |
"train_batch_size": 1,
|
| 8496 |
"trial_name": null,
|
| 8497 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.45611435174942017,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-800",
|
| 4 |
+
"epoch": 0.10054943081840055,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 1400,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 8463 |
"eval_samples_per_second": 5.482,
|
| 8464 |
"eval_steps_per_second": 5.482,
|
| 8465 |
"step": 1200
|
| 8466 |
+
},
|
| 8467 |
+
{
|
| 8468 |
+
"epoch": 0.08625704743778503,
|
| 8469 |
+
"grad_norm": 60.90087127685547,
|
| 8470 |
+
"learning_rate": 0.0002,
|
| 8471 |
+
"loss": 0.3922,
|
| 8472 |
+
"step": 1201
|
| 8473 |
+
},
|
| 8474 |
+
{
|
| 8475 |
+
"epoch": 0.08632886845979819,
|
| 8476 |
+
"grad_norm": 117.42794799804688,
|
| 8477 |
+
"learning_rate": 0.0002,
|
| 8478 |
+
"loss": 0.4031,
|
| 8479 |
+
"step": 1202
|
| 8480 |
+
},
|
| 8481 |
+
{
|
| 8482 |
+
"epoch": 0.08640068948181133,
|
| 8483 |
+
"grad_norm": 53.196128845214844,
|
| 8484 |
+
"learning_rate": 0.0002,
|
| 8485 |
+
"loss": 0.3816,
|
| 8486 |
+
"step": 1203
|
| 8487 |
+
},
|
| 8488 |
+
{
|
| 8489 |
+
"epoch": 0.08647251050382447,
|
| 8490 |
+
"grad_norm": 48.9370002746582,
|
| 8491 |
+
"learning_rate": 0.0002,
|
| 8492 |
+
"loss": 0.1292,
|
| 8493 |
+
"step": 1204
|
| 8494 |
+
},
|
| 8495 |
+
{
|
| 8496 |
+
"epoch": 0.08654433152583761,
|
| 8497 |
+
"grad_norm": 14.047211647033691,
|
| 8498 |
+
"learning_rate": 0.0002,
|
| 8499 |
+
"loss": 0.1024,
|
| 8500 |
+
"step": 1205
|
| 8501 |
+
},
|
| 8502 |
+
{
|
| 8503 |
+
"epoch": 0.08661615254785075,
|
| 8504 |
+
"grad_norm": 25.96181297302246,
|
| 8505 |
+
"learning_rate": 0.0002,
|
| 8506 |
+
"loss": 0.0395,
|
| 8507 |
+
"step": 1206
|
| 8508 |
+
},
|
| 8509 |
+
{
|
| 8510 |
+
"epoch": 0.0866879735698639,
|
| 8511 |
+
"grad_norm": 28.897790908813477,
|
| 8512 |
+
"learning_rate": 0.0002,
|
| 8513 |
+
"loss": 0.0703,
|
| 8514 |
+
"step": 1207
|
| 8515 |
+
},
|
| 8516 |
+
{
|
| 8517 |
+
"epoch": 0.08675979459187705,
|
| 8518 |
+
"grad_norm": 266.0474853515625,
|
| 8519 |
+
"learning_rate": 0.0002,
|
| 8520 |
+
"loss": 0.031,
|
| 8521 |
+
"step": 1208
|
| 8522 |
+
},
|
| 8523 |
+
{
|
| 8524 |
+
"epoch": 0.08683161561389019,
|
| 8525 |
+
"grad_norm": 41.11625671386719,
|
| 8526 |
+
"learning_rate": 0.0002,
|
| 8527 |
+
"loss": 0.027,
|
| 8528 |
+
"step": 1209
|
| 8529 |
+
},
|
| 8530 |
+
{
|
| 8531 |
+
"epoch": 0.08690343663590333,
|
| 8532 |
+
"grad_norm": 16.058467864990234,
|
| 8533 |
+
"learning_rate": 0.0002,
|
| 8534 |
+
"loss": 0.0179,
|
| 8535 |
+
"step": 1210
|
| 8536 |
+
},
|
| 8537 |
+
{
|
| 8538 |
+
"epoch": 0.08697525765791647,
|
| 8539 |
+
"grad_norm": 24.90563201904297,
|
| 8540 |
+
"learning_rate": 0.0002,
|
| 8541 |
+
"loss": 0.0119,
|
| 8542 |
+
"step": 1211
|
| 8543 |
+
},
|
| 8544 |
+
{
|
| 8545 |
+
"epoch": 0.08704707867992961,
|
| 8546 |
+
"grad_norm": 10.05605411529541,
|
| 8547 |
+
"learning_rate": 0.0002,
|
| 8548 |
+
"loss": 0.0186,
|
| 8549 |
+
"step": 1212
|
| 8550 |
+
},
|
| 8551 |
+
{
|
| 8552 |
+
"epoch": 0.08711889970194275,
|
| 8553 |
+
"grad_norm": 12.948044776916504,
|
| 8554 |
+
"learning_rate": 0.0002,
|
| 8555 |
+
"loss": 0.0038,
|
| 8556 |
+
"step": 1213
|
| 8557 |
+
},
|
| 8558 |
+
{
|
| 8559 |
+
"epoch": 0.08719072072395591,
|
| 8560 |
+
"grad_norm": 10.987204551696777,
|
| 8561 |
+
"learning_rate": 0.0002,
|
| 8562 |
+
"loss": 0.019,
|
| 8563 |
+
"step": 1214
|
| 8564 |
+
},
|
| 8565 |
+
{
|
| 8566 |
+
"epoch": 0.08726254174596905,
|
| 8567 |
+
"grad_norm": 6.216747283935547,
|
| 8568 |
+
"learning_rate": 0.0002,
|
| 8569 |
+
"loss": 0.0188,
|
| 8570 |
+
"step": 1215
|
| 8571 |
+
},
|
| 8572 |
+
{
|
| 8573 |
+
"epoch": 0.08733436276798219,
|
| 8574 |
+
"grad_norm": 29.069843292236328,
|
| 8575 |
+
"learning_rate": 0.0002,
|
| 8576 |
+
"loss": 0.145,
|
| 8577 |
+
"step": 1216
|
| 8578 |
+
},
|
| 8579 |
+
{
|
| 8580 |
+
"epoch": 0.08740618378999533,
|
| 8581 |
+
"grad_norm": 184.40255737304688,
|
| 8582 |
+
"learning_rate": 0.0002,
|
| 8583 |
+
"loss": 0.0811,
|
| 8584 |
+
"step": 1217
|
| 8585 |
+
},
|
| 8586 |
+
{
|
| 8587 |
+
"epoch": 0.08747800481200847,
|
| 8588 |
+
"grad_norm": 598.80322265625,
|
| 8589 |
+
"learning_rate": 0.0002,
|
| 8590 |
+
"loss": 0.0715,
|
| 8591 |
+
"step": 1218
|
| 8592 |
+
},
|
| 8593 |
+
{
|
| 8594 |
+
"epoch": 0.08754982583402161,
|
| 8595 |
+
"grad_norm": 265.7961730957031,
|
| 8596 |
+
"learning_rate": 0.0002,
|
| 8597 |
+
"loss": 0.1082,
|
| 8598 |
+
"step": 1219
|
| 8599 |
+
},
|
| 8600 |
+
{
|
| 8601 |
+
"epoch": 0.08762164685603475,
|
| 8602 |
+
"grad_norm": 1777.3001708984375,
|
| 8603 |
+
"learning_rate": 0.0002,
|
| 8604 |
+
"loss": 0.2009,
|
| 8605 |
+
"step": 1220
|
| 8606 |
+
},
|
| 8607 |
+
{
|
| 8608 |
+
"epoch": 0.08769346787804791,
|
| 8609 |
+
"grad_norm": 24097.189453125,
|
| 8610 |
+
"learning_rate": 0.0002,
|
| 8611 |
+
"loss": 0.4767,
|
| 8612 |
+
"step": 1221
|
| 8613 |
+
},
|
| 8614 |
+
{
|
| 8615 |
+
"epoch": 0.08776528890006105,
|
| 8616 |
+
"grad_norm": 11781.5439453125,
|
| 8617 |
+
"learning_rate": 0.0002,
|
| 8618 |
+
"loss": 0.6679,
|
| 8619 |
+
"step": 1222
|
| 8620 |
+
},
|
| 8621 |
+
{
|
| 8622 |
+
"epoch": 0.08783710992207419,
|
| 8623 |
+
"grad_norm": 7429.06396484375,
|
| 8624 |
+
"learning_rate": 0.0002,
|
| 8625 |
+
"loss": 1.2877,
|
| 8626 |
+
"step": 1223
|
| 8627 |
+
},
|
| 8628 |
+
{
|
| 8629 |
+
"epoch": 0.08790893094408733,
|
| 8630 |
+
"grad_norm": 11500.3623046875,
|
| 8631 |
+
"learning_rate": 0.0002,
|
| 8632 |
+
"loss": 1.7367,
|
| 8633 |
+
"step": 1224
|
| 8634 |
+
},
|
| 8635 |
+
{
|
| 8636 |
+
"epoch": 0.08798075196610047,
|
| 8637 |
+
"grad_norm": 3542.4248046875,
|
| 8638 |
+
"learning_rate": 0.0002,
|
| 8639 |
+
"loss": 2.6812,
|
| 8640 |
+
"step": 1225
|
| 8641 |
+
},
|
| 8642 |
+
{
|
| 8643 |
+
"epoch": 0.08805257298811361,
|
| 8644 |
+
"grad_norm": 19836.248046875,
|
| 8645 |
+
"learning_rate": 0.0002,
|
| 8646 |
+
"loss": 2.6966,
|
| 8647 |
+
"step": 1226
|
| 8648 |
+
},
|
| 8649 |
+
{
|
| 8650 |
+
"epoch": 0.08812439401012677,
|
| 8651 |
+
"grad_norm": 320707.65625,
|
| 8652 |
+
"learning_rate": 0.0002,
|
| 8653 |
+
"loss": 2.7375,
|
| 8654 |
+
"step": 1227
|
| 8655 |
+
},
|
| 8656 |
+
{
|
| 8657 |
+
"epoch": 0.08819621503213991,
|
| 8658 |
+
"grad_norm": 11478.837890625,
|
| 8659 |
+
"learning_rate": 0.0002,
|
| 8660 |
+
"loss": 2.2301,
|
| 8661 |
+
"step": 1228
|
| 8662 |
+
},
|
| 8663 |
+
{
|
| 8664 |
+
"epoch": 0.08826803605415305,
|
| 8665 |
+
"grad_norm": 3958.811279296875,
|
| 8666 |
+
"learning_rate": 0.0002,
|
| 8667 |
+
"loss": 2.5798,
|
| 8668 |
+
"step": 1229
|
| 8669 |
+
},
|
| 8670 |
+
{
|
| 8671 |
+
"epoch": 0.08833985707616619,
|
| 8672 |
+
"grad_norm": 5358.56005859375,
|
| 8673 |
+
"learning_rate": 0.0002,
|
| 8674 |
+
"loss": 3.0264,
|
| 8675 |
+
"step": 1230
|
| 8676 |
+
},
|
| 8677 |
+
{
|
| 8678 |
+
"epoch": 0.08841167809817933,
|
| 8679 |
+
"grad_norm": 22702.763671875,
|
| 8680 |
+
"learning_rate": 0.0002,
|
| 8681 |
+
"loss": 4.2674,
|
| 8682 |
+
"step": 1231
|
| 8683 |
+
},
|
| 8684 |
+
{
|
| 8685 |
+
"epoch": 0.08848349912019247,
|
| 8686 |
+
"grad_norm": 431.3515625,
|
| 8687 |
+
"learning_rate": 0.0002,
|
| 8688 |
+
"loss": 4.7265,
|
| 8689 |
+
"step": 1232
|
| 8690 |
+
},
|
| 8691 |
+
{
|
| 8692 |
+
"epoch": 0.08855532014220563,
|
| 8693 |
+
"grad_norm": 1031.437255859375,
|
| 8694 |
+
"learning_rate": 0.0002,
|
| 8695 |
+
"loss": 3.4126,
|
| 8696 |
+
"step": 1233
|
| 8697 |
+
},
|
| 8698 |
+
{
|
| 8699 |
+
"epoch": 0.08862714116421877,
|
| 8700 |
+
"grad_norm": 1139.3426513671875,
|
| 8701 |
+
"learning_rate": 0.0002,
|
| 8702 |
+
"loss": 3.0971,
|
| 8703 |
+
"step": 1234
|
| 8704 |
+
},
|
| 8705 |
+
{
|
| 8706 |
+
"epoch": 0.08869896218623191,
|
| 8707 |
+
"grad_norm": 5480.80322265625,
|
| 8708 |
+
"learning_rate": 0.0002,
|
| 8709 |
+
"loss": 2.6674,
|
| 8710 |
+
"step": 1235
|
| 8711 |
+
},
|
| 8712 |
+
{
|
| 8713 |
+
"epoch": 0.08877078320824505,
|
| 8714 |
+
"grad_norm": 337.08563232421875,
|
| 8715 |
+
"learning_rate": 0.0002,
|
| 8716 |
+
"loss": 2.8321,
|
| 8717 |
+
"step": 1236
|
| 8718 |
+
},
|
| 8719 |
+
{
|
| 8720 |
+
"epoch": 0.0888426042302582,
|
| 8721 |
+
"grad_norm": 1114.62646484375,
|
| 8722 |
+
"learning_rate": 0.0002,
|
| 8723 |
+
"loss": 2.06,
|
| 8724 |
+
"step": 1237
|
| 8725 |
+
},
|
| 8726 |
+
{
|
| 8727 |
+
"epoch": 0.08891442525227133,
|
| 8728 |
+
"grad_norm": 301.4099426269531,
|
| 8729 |
+
"learning_rate": 0.0002,
|
| 8730 |
+
"loss": 1.8179,
|
| 8731 |
+
"step": 1238
|
| 8732 |
+
},
|
| 8733 |
+
{
|
| 8734 |
+
"epoch": 0.08898624627428449,
|
| 8735 |
+
"grad_norm": 99.02914428710938,
|
| 8736 |
+
"learning_rate": 0.0002,
|
| 8737 |
+
"loss": 2.1298,
|
| 8738 |
+
"step": 1239
|
| 8739 |
+
},
|
| 8740 |
+
{
|
| 8741 |
+
"epoch": 0.08905806729629763,
|
| 8742 |
+
"grad_norm": 97.86017608642578,
|
| 8743 |
+
"learning_rate": 0.0002,
|
| 8744 |
+
"loss": 0.7338,
|
| 8745 |
+
"step": 1240
|
| 8746 |
+
},
|
| 8747 |
+
{
|
| 8748 |
+
"epoch": 0.08912988831831077,
|
| 8749 |
+
"grad_norm": 172.6566925048828,
|
| 8750 |
+
"learning_rate": 0.0002,
|
| 8751 |
+
"loss": 0.8901,
|
| 8752 |
+
"step": 1241
|
| 8753 |
+
},
|
| 8754 |
+
{
|
| 8755 |
+
"epoch": 0.08920170934032391,
|
| 8756 |
+
"grad_norm": 365.0599365234375,
|
| 8757 |
+
"learning_rate": 0.0002,
|
| 8758 |
+
"loss": 0.7761,
|
| 8759 |
+
"step": 1242
|
| 8760 |
+
},
|
| 8761 |
+
{
|
| 8762 |
+
"epoch": 0.08927353036233705,
|
| 8763 |
+
"grad_norm": 538.0731201171875,
|
| 8764 |
+
"learning_rate": 0.0002,
|
| 8765 |
+
"loss": 0.7911,
|
| 8766 |
+
"step": 1243
|
| 8767 |
+
},
|
| 8768 |
+
{
|
| 8769 |
+
"epoch": 0.0893453513843502,
|
| 8770 |
+
"grad_norm": 1002.2313232421875,
|
| 8771 |
+
"learning_rate": 0.0002,
|
| 8772 |
+
"loss": 1.3762,
|
| 8773 |
+
"step": 1244
|
| 8774 |
+
},
|
| 8775 |
+
{
|
| 8776 |
+
"epoch": 0.08941717240636335,
|
| 8777 |
+
"grad_norm": 383.1561279296875,
|
| 8778 |
+
"learning_rate": 0.0002,
|
| 8779 |
+
"loss": 2.3502,
|
| 8780 |
+
"step": 1245
|
| 8781 |
+
},
|
| 8782 |
+
{
|
| 8783 |
+
"epoch": 0.08948899342837649,
|
| 8784 |
+
"grad_norm": 667.1840209960938,
|
| 8785 |
+
"learning_rate": 0.0002,
|
| 8786 |
+
"loss": 1.8587,
|
| 8787 |
+
"step": 1246
|
| 8788 |
+
},
|
| 8789 |
+
{
|
| 8790 |
+
"epoch": 0.08956081445038963,
|
| 8791 |
+
"grad_norm": 7134.595703125,
|
| 8792 |
+
"learning_rate": 0.0002,
|
| 8793 |
+
"loss": 1.8113,
|
| 8794 |
+
"step": 1247
|
| 8795 |
+
},
|
| 8796 |
+
{
|
| 8797 |
+
"epoch": 0.08963263547240277,
|
| 8798 |
+
"grad_norm": 918.25732421875,
|
| 8799 |
+
"learning_rate": 0.0002,
|
| 8800 |
+
"loss": 2.1741,
|
| 8801 |
+
"step": 1248
|
| 8802 |
+
},
|
| 8803 |
+
{
|
| 8804 |
+
"epoch": 0.08970445649441591,
|
| 8805 |
+
"grad_norm": 767.9859008789062,
|
| 8806 |
+
"learning_rate": 0.0002,
|
| 8807 |
+
"loss": 1.9515,
|
| 8808 |
+
"step": 1249
|
| 8809 |
+
},
|
| 8810 |
+
{
|
| 8811 |
+
"epoch": 0.08977627751642905,
|
| 8812 |
+
"grad_norm": 834.791748046875,
|
| 8813 |
+
"learning_rate": 0.0002,
|
| 8814 |
+
"loss": 1.3873,
|
| 8815 |
+
"step": 1250
|
| 8816 |
+
},
|
| 8817 |
+
{
|
| 8818 |
+
"epoch": 0.08984809853844221,
|
| 8819 |
+
"grad_norm": 1537.1246337890625,
|
| 8820 |
+
"learning_rate": 0.0002,
|
| 8821 |
+
"loss": 6.4346,
|
| 8822 |
+
"step": 1251
|
| 8823 |
+
},
|
| 8824 |
+
{
|
| 8825 |
+
"epoch": 0.08991991956045535,
|
| 8826 |
+
"grad_norm": 4255.82470703125,
|
| 8827 |
+
"learning_rate": 0.0002,
|
| 8828 |
+
"loss": 5.2203,
|
| 8829 |
+
"step": 1252
|
| 8830 |
+
},
|
| 8831 |
+
{
|
| 8832 |
+
"epoch": 0.08999174058246849,
|
| 8833 |
+
"grad_norm": 4223.40087890625,
|
| 8834 |
+
"learning_rate": 0.0002,
|
| 8835 |
+
"loss": 4.9269,
|
| 8836 |
+
"step": 1253
|
| 8837 |
+
},
|
| 8838 |
+
{
|
| 8839 |
+
"epoch": 0.09006356160448163,
|
| 8840 |
+
"grad_norm": 1608.9921875,
|
| 8841 |
+
"learning_rate": 0.0002,
|
| 8842 |
+
"loss": 4.7313,
|
| 8843 |
+
"step": 1254
|
| 8844 |
+
},
|
| 8845 |
+
{
|
| 8846 |
+
"epoch": 0.09013538262649477,
|
| 8847 |
+
"grad_norm": 3040.088134765625,
|
| 8848 |
+
"learning_rate": 0.0002,
|
| 8849 |
+
"loss": 4.7124,
|
| 8850 |
+
"step": 1255
|
| 8851 |
+
},
|
| 8852 |
+
{
|
| 8853 |
+
"epoch": 0.09020720364850791,
|
| 8854 |
+
"grad_norm": 2185.75537109375,
|
| 8855 |
+
"learning_rate": 0.0002,
|
| 8856 |
+
"loss": 4.8097,
|
| 8857 |
+
"step": 1256
|
| 8858 |
+
},
|
| 8859 |
+
{
|
| 8860 |
+
"epoch": 0.09027902467052105,
|
| 8861 |
+
"grad_norm": 4034.5,
|
| 8862 |
+
"learning_rate": 0.0002,
|
| 8863 |
+
"loss": 4.4307,
|
| 8864 |
+
"step": 1257
|
| 8865 |
+
},
|
| 8866 |
+
{
|
| 8867 |
+
"epoch": 0.09035084569253421,
|
| 8868 |
+
"grad_norm": 3907.183837890625,
|
| 8869 |
+
"learning_rate": 0.0002,
|
| 8870 |
+
"loss": 3.2381,
|
| 8871 |
+
"step": 1258
|
| 8872 |
+
},
|
| 8873 |
+
{
|
| 8874 |
+
"epoch": 0.09042266671454735,
|
| 8875 |
+
"grad_norm": 758.9627075195312,
|
| 8876 |
+
"learning_rate": 0.0002,
|
| 8877 |
+
"loss": 3.5754,
|
| 8878 |
+
"step": 1259
|
| 8879 |
+
},
|
| 8880 |
+
{
|
| 8881 |
+
"epoch": 0.09049448773656049,
|
| 8882 |
+
"grad_norm": 1572.59765625,
|
| 8883 |
+
"learning_rate": 0.0002,
|
| 8884 |
+
"loss": 2.0373,
|
| 8885 |
+
"step": 1260
|
| 8886 |
+
},
|
| 8887 |
+
{
|
| 8888 |
+
"epoch": 0.09056630875857363,
|
| 8889 |
+
"grad_norm": 3224.02001953125,
|
| 8890 |
+
"learning_rate": 0.0002,
|
| 8891 |
+
"loss": 1.1423,
|
| 8892 |
+
"step": 1261
|
| 8893 |
+
},
|
| 8894 |
+
{
|
| 8895 |
+
"epoch": 0.09063812978058677,
|
| 8896 |
+
"grad_norm": 6365.9375,
|
| 8897 |
+
"learning_rate": 0.0002,
|
| 8898 |
+
"loss": 1.1589,
|
| 8899 |
+
"step": 1262
|
| 8900 |
+
},
|
| 8901 |
+
{
|
| 8902 |
+
"epoch": 0.09070995080259991,
|
| 8903 |
+
"grad_norm": 5653.08349609375,
|
| 8904 |
+
"learning_rate": 0.0002,
|
| 8905 |
+
"loss": 0.8969,
|
| 8906 |
+
"step": 1263
|
| 8907 |
+
},
|
| 8908 |
+
{
|
| 8909 |
+
"epoch": 0.09078177182461307,
|
| 8910 |
+
"grad_norm": 2920.125732421875,
|
| 8911 |
+
"learning_rate": 0.0002,
|
| 8912 |
+
"loss": 1.081,
|
| 8913 |
+
"step": 1264
|
| 8914 |
+
},
|
| 8915 |
+
{
|
| 8916 |
+
"epoch": 0.09085359284662621,
|
| 8917 |
+
"grad_norm": 3593.2783203125,
|
| 8918 |
+
"learning_rate": 0.0002,
|
| 8919 |
+
"loss": 1.2207,
|
| 8920 |
+
"step": 1265
|
| 8921 |
+
},
|
| 8922 |
+
{
|
| 8923 |
+
"epoch": 0.09092541386863935,
|
| 8924 |
+
"grad_norm": 318.59197998046875,
|
| 8925 |
+
"learning_rate": 0.0002,
|
| 8926 |
+
"loss": 1.0823,
|
| 8927 |
+
"step": 1266
|
| 8928 |
+
},
|
| 8929 |
+
{
|
| 8930 |
+
"epoch": 0.09099723489065249,
|
| 8931 |
+
"grad_norm": 700.755126953125,
|
| 8932 |
+
"learning_rate": 0.0002,
|
| 8933 |
+
"loss": 0.9032,
|
| 8934 |
+
"step": 1267
|
| 8935 |
+
},
|
| 8936 |
+
{
|
| 8937 |
+
"epoch": 0.09106905591266563,
|
| 8938 |
+
"grad_norm": 191.01446533203125,
|
| 8939 |
+
"learning_rate": 0.0002,
|
| 8940 |
+
"loss": 1.1056,
|
| 8941 |
+
"step": 1268
|
| 8942 |
+
},
|
| 8943 |
+
{
|
| 8944 |
+
"epoch": 0.09114087693467877,
|
| 8945 |
+
"grad_norm": 129.18807983398438,
|
| 8946 |
+
"learning_rate": 0.0002,
|
| 8947 |
+
"loss": 0.7014,
|
| 8948 |
+
"step": 1269
|
| 8949 |
+
},
|
| 8950 |
+
{
|
| 8951 |
+
"epoch": 0.09121269795669193,
|
| 8952 |
+
"grad_norm": 36.7803955078125,
|
| 8953 |
+
"learning_rate": 0.0002,
|
| 8954 |
+
"loss": 0.3591,
|
| 8955 |
+
"step": 1270
|
| 8956 |
+
},
|
| 8957 |
+
{
|
| 8958 |
+
"epoch": 0.09128451897870507,
|
| 8959 |
+
"grad_norm": 37.67393493652344,
|
| 8960 |
+
"learning_rate": 0.0002,
|
| 8961 |
+
"loss": 0.1294,
|
| 8962 |
+
"step": 1271
|
| 8963 |
+
},
|
| 8964 |
+
{
|
| 8965 |
+
"epoch": 0.09135634000071821,
|
| 8966 |
+
"grad_norm": 19.421794891357422,
|
| 8967 |
+
"learning_rate": 0.0002,
|
| 8968 |
+
"loss": 0.055,
|
| 8969 |
+
"step": 1272
|
| 8970 |
+
},
|
| 8971 |
+
{
|
| 8972 |
+
"epoch": 0.09142816102273135,
|
| 8973 |
+
"grad_norm": 23.07987403869629,
|
| 8974 |
+
"learning_rate": 0.0002,
|
| 8975 |
+
"loss": 0.0426,
|
| 8976 |
+
"step": 1273
|
| 8977 |
+
},
|
| 8978 |
+
{
|
| 8979 |
+
"epoch": 0.0914999820447445,
|
| 8980 |
+
"grad_norm": 19.846647262573242,
|
| 8981 |
+
"learning_rate": 0.0002,
|
| 8982 |
+
"loss": 0.0477,
|
| 8983 |
+
"step": 1274
|
| 8984 |
+
},
|
| 8985 |
+
{
|
| 8986 |
+
"epoch": 0.09157180306675763,
|
| 8987 |
+
"grad_norm": 37.141204833984375,
|
| 8988 |
+
"learning_rate": 0.0002,
|
| 8989 |
+
"loss": 0.0525,
|
| 8990 |
+
"step": 1275
|
| 8991 |
+
},
|
| 8992 |
+
{
|
| 8993 |
+
"epoch": 0.09164362408877079,
|
| 8994 |
+
"grad_norm": 31.53236961364746,
|
| 8995 |
+
"learning_rate": 0.0002,
|
| 8996 |
+
"loss": 0.0205,
|
| 8997 |
+
"step": 1276
|
| 8998 |
+
},
|
| 8999 |
+
{
|
| 9000 |
+
"epoch": 0.09171544511078393,
|
| 9001 |
+
"grad_norm": 10.85024642944336,
|
| 9002 |
+
"learning_rate": 0.0002,
|
| 9003 |
+
"loss": 0.0162,
|
| 9004 |
+
"step": 1277
|
| 9005 |
+
},
|
| 9006 |
+
{
|
| 9007 |
+
"epoch": 0.09178726613279707,
|
| 9008 |
+
"grad_norm": 5.539823055267334,
|
| 9009 |
+
"learning_rate": 0.0002,
|
| 9010 |
+
"loss": 0.0112,
|
| 9011 |
+
"step": 1278
|
| 9012 |
+
},
|
| 9013 |
+
{
|
| 9014 |
+
"epoch": 0.09185908715481021,
|
| 9015 |
+
"grad_norm": 3.093498706817627,
|
| 9016 |
+
"learning_rate": 0.0002,
|
| 9017 |
+
"loss": 0.0176,
|
| 9018 |
+
"step": 1279
|
| 9019 |
+
},
|
| 9020 |
+
{
|
| 9021 |
+
"epoch": 0.09193090817682335,
|
| 9022 |
+
"grad_norm": 20.419414520263672,
|
| 9023 |
+
"learning_rate": 0.0002,
|
| 9024 |
+
"loss": 0.0247,
|
| 9025 |
+
"step": 1280
|
| 9026 |
+
},
|
| 9027 |
+
{
|
| 9028 |
+
"epoch": 0.0920027291988365,
|
| 9029 |
+
"grad_norm": 50.28502655029297,
|
| 9030 |
+
"learning_rate": 0.0002,
|
| 9031 |
+
"loss": 0.1217,
|
| 9032 |
+
"step": 1281
|
| 9033 |
+
},
|
| 9034 |
+
{
|
| 9035 |
+
"epoch": 0.09207455022084965,
|
| 9036 |
+
"grad_norm": 96.8090591430664,
|
| 9037 |
+
"learning_rate": 0.0002,
|
| 9038 |
+
"loss": 0.0981,
|
| 9039 |
+
"step": 1282
|
| 9040 |
+
},
|
| 9041 |
+
{
|
| 9042 |
+
"epoch": 0.09214637124286279,
|
| 9043 |
+
"grad_norm": 27.442201614379883,
|
| 9044 |
+
"learning_rate": 0.0002,
|
| 9045 |
+
"loss": 0.0502,
|
| 9046 |
+
"step": 1283
|
| 9047 |
+
},
|
| 9048 |
+
{
|
| 9049 |
+
"epoch": 0.09221819226487593,
|
| 9050 |
+
"grad_norm": 48.08268356323242,
|
| 9051 |
+
"learning_rate": 0.0002,
|
| 9052 |
+
"loss": 0.0733,
|
| 9053 |
+
"step": 1284
|
| 9054 |
+
},
|
| 9055 |
+
{
|
| 9056 |
+
"epoch": 0.09229001328688907,
|
| 9057 |
+
"grad_norm": 48.22146224975586,
|
| 9058 |
+
"learning_rate": 0.0002,
|
| 9059 |
+
"loss": 0.0866,
|
| 9060 |
+
"step": 1285
|
| 9061 |
+
},
|
| 9062 |
+
{
|
| 9063 |
+
"epoch": 0.09236183430890221,
|
| 9064 |
+
"grad_norm": 192.900390625,
|
| 9065 |
+
"learning_rate": 0.0002,
|
| 9066 |
+
"loss": 2.9683,
|
| 9067 |
+
"step": 1286
|
| 9068 |
+
},
|
| 9069 |
+
{
|
| 9070 |
+
"epoch": 0.09243365533091535,
|
| 9071 |
+
"grad_norm": 1478.4373779296875,
|
| 9072 |
+
"learning_rate": 0.0002,
|
| 9073 |
+
"loss": 2.0743,
|
| 9074 |
+
"step": 1287
|
| 9075 |
+
},
|
| 9076 |
+
{
|
| 9077 |
+
"epoch": 0.09250547635292851,
|
| 9078 |
+
"grad_norm": 257.5186462402344,
|
| 9079 |
+
"learning_rate": 0.0002,
|
| 9080 |
+
"loss": 1.8619,
|
| 9081 |
+
"step": 1288
|
| 9082 |
+
},
|
| 9083 |
+
{
|
| 9084 |
+
"epoch": 0.09257729737494165,
|
| 9085 |
+
"grad_norm": 202.7418212890625,
|
| 9086 |
+
"learning_rate": 0.0002,
|
| 9087 |
+
"loss": 0.9179,
|
| 9088 |
+
"step": 1289
|
| 9089 |
+
},
|
| 9090 |
+
{
|
| 9091 |
+
"epoch": 0.09264911839695479,
|
| 9092 |
+
"grad_norm": 144.3949737548828,
|
| 9093 |
+
"learning_rate": 0.0002,
|
| 9094 |
+
"loss": 0.7807,
|
| 9095 |
+
"step": 1290
|
| 9096 |
+
},
|
| 9097 |
+
{
|
| 9098 |
+
"epoch": 0.09272093941896793,
|
| 9099 |
+
"grad_norm": 76.2898941040039,
|
| 9100 |
+
"learning_rate": 0.0002,
|
| 9101 |
+
"loss": 0.5442,
|
| 9102 |
+
"step": 1291
|
| 9103 |
+
},
|
| 9104 |
+
{
|
| 9105 |
+
"epoch": 0.09279276044098107,
|
| 9106 |
+
"grad_norm": 50.71910858154297,
|
| 9107 |
+
"learning_rate": 0.0002,
|
| 9108 |
+
"loss": 0.267,
|
| 9109 |
+
"step": 1292
|
| 9110 |
+
},
|
| 9111 |
+
{
|
| 9112 |
+
"epoch": 0.09286458146299421,
|
| 9113 |
+
"grad_norm": 45.99614715576172,
|
| 9114 |
+
"learning_rate": 0.0002,
|
| 9115 |
+
"loss": 0.1436,
|
| 9116 |
+
"step": 1293
|
| 9117 |
+
},
|
| 9118 |
+
{
|
| 9119 |
+
"epoch": 0.09293640248500736,
|
| 9120 |
+
"grad_norm": 34.93791961669922,
|
| 9121 |
+
"learning_rate": 0.0002,
|
| 9122 |
+
"loss": 0.1981,
|
| 9123 |
+
"step": 1294
|
| 9124 |
+
},
|
| 9125 |
+
{
|
| 9126 |
+
"epoch": 0.09300822350702051,
|
| 9127 |
+
"grad_norm": 33.471378326416016,
|
| 9128 |
+
"learning_rate": 0.0002,
|
| 9129 |
+
"loss": 0.0687,
|
| 9130 |
+
"step": 1295
|
| 9131 |
+
},
|
| 9132 |
+
{
|
| 9133 |
+
"epoch": 0.09308004452903365,
|
| 9134 |
+
"grad_norm": 51.53607177734375,
|
| 9135 |
+
"learning_rate": 0.0002,
|
| 9136 |
+
"loss": 0.0972,
|
| 9137 |
+
"step": 1296
|
| 9138 |
+
},
|
| 9139 |
+
{
|
| 9140 |
+
"epoch": 0.09315186555104679,
|
| 9141 |
+
"grad_norm": 57.13620376586914,
|
| 9142 |
+
"learning_rate": 0.0002,
|
| 9143 |
+
"loss": 0.0317,
|
| 9144 |
+
"step": 1297
|
| 9145 |
+
},
|
| 9146 |
+
{
|
| 9147 |
+
"epoch": 0.09322368657305993,
|
| 9148 |
+
"grad_norm": 118.36432647705078,
|
| 9149 |
+
"learning_rate": 0.0002,
|
| 9150 |
+
"loss": 0.1188,
|
| 9151 |
+
"step": 1298
|
| 9152 |
+
},
|
| 9153 |
+
{
|
| 9154 |
+
"epoch": 0.09329550759507307,
|
| 9155 |
+
"grad_norm": 299.77923583984375,
|
| 9156 |
+
"learning_rate": 0.0002,
|
| 9157 |
+
"loss": 0.286,
|
| 9158 |
+
"step": 1299
|
| 9159 |
+
},
|
| 9160 |
+
{
|
| 9161 |
+
"epoch": 0.09336732861708621,
|
| 9162 |
+
"grad_norm": 929.7402954101562,
|
| 9163 |
+
"learning_rate": 0.0002,
|
| 9164 |
+
"loss": 0.6301,
|
| 9165 |
+
"step": 1300
|
| 9166 |
+
},
|
| 9167 |
+
{
|
| 9168 |
+
"epoch": 0.09343914963909937,
|
| 9169 |
+
"grad_norm": 1662.5167236328125,
|
| 9170 |
+
"learning_rate": 0.0002,
|
| 9171 |
+
"loss": 0.1967,
|
| 9172 |
+
"step": 1301
|
| 9173 |
+
},
|
| 9174 |
+
{
|
| 9175 |
+
"epoch": 0.09351097066111251,
|
| 9176 |
+
"grad_norm": 580.6841430664062,
|
| 9177 |
+
"learning_rate": 0.0002,
|
| 9178 |
+
"loss": 0.351,
|
| 9179 |
+
"step": 1302
|
| 9180 |
+
},
|
| 9181 |
+
{
|
| 9182 |
+
"epoch": 0.09358279168312565,
|
| 9183 |
+
"grad_norm": 929.946044921875,
|
| 9184 |
+
"learning_rate": 0.0002,
|
| 9185 |
+
"loss": 0.4387,
|
| 9186 |
+
"step": 1303
|
| 9187 |
+
},
|
| 9188 |
+
{
|
| 9189 |
+
"epoch": 0.09365461270513879,
|
| 9190 |
+
"grad_norm": 6147.51953125,
|
| 9191 |
+
"learning_rate": 0.0002,
|
| 9192 |
+
"loss": 0.3985,
|
| 9193 |
+
"step": 1304
|
| 9194 |
+
},
|
| 9195 |
+
{
|
| 9196 |
+
"epoch": 0.09372643372715193,
|
| 9197 |
+
"grad_norm": 1374.1129150390625,
|
| 9198 |
+
"learning_rate": 0.0002,
|
| 9199 |
+
"loss": 0.3492,
|
| 9200 |
+
"step": 1305
|
| 9201 |
+
},
|
| 9202 |
+
{
|
| 9203 |
+
"epoch": 0.09379825474916507,
|
| 9204 |
+
"grad_norm": 906.6214599609375,
|
| 9205 |
+
"learning_rate": 0.0002,
|
| 9206 |
+
"loss": 0.351,
|
| 9207 |
+
"step": 1306
|
| 9208 |
+
},
|
| 9209 |
+
{
|
| 9210 |
+
"epoch": 0.09387007577117823,
|
| 9211 |
+
"grad_norm": 2183.86279296875,
|
| 9212 |
+
"learning_rate": 0.0002,
|
| 9213 |
+
"loss": 0.3932,
|
| 9214 |
+
"step": 1307
|
| 9215 |
+
},
|
| 9216 |
+
{
|
| 9217 |
+
"epoch": 0.09394189679319137,
|
| 9218 |
+
"grad_norm": 1404.1693115234375,
|
| 9219 |
+
"learning_rate": 0.0002,
|
| 9220 |
+
"loss": 0.2698,
|
| 9221 |
+
"step": 1308
|
| 9222 |
+
},
|
| 9223 |
+
{
|
| 9224 |
+
"epoch": 0.09401371781520451,
|
| 9225 |
+
"grad_norm": 584.3571166992188,
|
| 9226 |
+
"learning_rate": 0.0002,
|
| 9227 |
+
"loss": 0.3345,
|
| 9228 |
+
"step": 1309
|
| 9229 |
+
},
|
| 9230 |
+
{
|
| 9231 |
+
"epoch": 0.09408553883721765,
|
| 9232 |
+
"grad_norm": 373.7394104003906,
|
| 9233 |
+
"learning_rate": 0.0002,
|
| 9234 |
+
"loss": 0.3348,
|
| 9235 |
+
"step": 1310
|
| 9236 |
+
},
|
| 9237 |
+
{
|
| 9238 |
+
"epoch": 0.0941573598592308,
|
| 9239 |
+
"grad_norm": 98.7094497680664,
|
| 9240 |
+
"learning_rate": 0.0002,
|
| 9241 |
+
"loss": 0.3218,
|
| 9242 |
+
"step": 1311
|
| 9243 |
+
},
|
| 9244 |
+
{
|
| 9245 |
+
"epoch": 0.09422918088124393,
|
| 9246 |
+
"grad_norm": 98.34455871582031,
|
| 9247 |
+
"learning_rate": 0.0002,
|
| 9248 |
+
"loss": 0.1556,
|
| 9249 |
+
"step": 1312
|
| 9250 |
+
},
|
| 9251 |
+
{
|
| 9252 |
+
"epoch": 0.09430100190325709,
|
| 9253 |
+
"grad_norm": 3735.58154296875,
|
| 9254 |
+
"learning_rate": 0.0002,
|
| 9255 |
+
"loss": 0.1075,
|
| 9256 |
+
"step": 1313
|
| 9257 |
+
},
|
| 9258 |
+
{
|
| 9259 |
+
"epoch": 0.09437282292527023,
|
| 9260 |
+
"grad_norm": 171.54306030273438,
|
| 9261 |
+
"learning_rate": 0.0002,
|
| 9262 |
+
"loss": 0.0918,
|
| 9263 |
+
"step": 1314
|
| 9264 |
+
},
|
| 9265 |
+
{
|
| 9266 |
+
"epoch": 0.09444464394728337,
|
| 9267 |
+
"grad_norm": 94.25514221191406,
|
| 9268 |
+
"learning_rate": 0.0002,
|
| 9269 |
+
"loss": 0.0693,
|
| 9270 |
+
"step": 1315
|
| 9271 |
+
},
|
| 9272 |
+
{
|
| 9273 |
+
"epoch": 0.09451646496929651,
|
| 9274 |
+
"grad_norm": 127.65548706054688,
|
| 9275 |
+
"learning_rate": 0.0002,
|
| 9276 |
+
"loss": 0.0784,
|
| 9277 |
+
"step": 1316
|
| 9278 |
+
},
|
| 9279 |
+
{
|
| 9280 |
+
"epoch": 0.09458828599130965,
|
| 9281 |
+
"grad_norm": 121.6147689819336,
|
| 9282 |
+
"learning_rate": 0.0002,
|
| 9283 |
+
"loss": 0.0542,
|
| 9284 |
+
"step": 1317
|
| 9285 |
+
},
|
| 9286 |
+
{
|
| 9287 |
+
"epoch": 0.0946601070133228,
|
| 9288 |
+
"grad_norm": 268.5514221191406,
|
| 9289 |
+
"learning_rate": 0.0002,
|
| 9290 |
+
"loss": 0.0718,
|
| 9291 |
+
"step": 1318
|
| 9292 |
+
},
|
| 9293 |
+
{
|
| 9294 |
+
"epoch": 0.09473192803533595,
|
| 9295 |
+
"grad_norm": 470.7925109863281,
|
| 9296 |
+
"learning_rate": 0.0002,
|
| 9297 |
+
"loss": 0.1093,
|
| 9298 |
+
"step": 1319
|
| 9299 |
+
},
|
| 9300 |
+
{
|
| 9301 |
+
"epoch": 0.09480374905734909,
|
| 9302 |
+
"grad_norm": 321.05712890625,
|
| 9303 |
+
"learning_rate": 0.0002,
|
| 9304 |
+
"loss": 0.111,
|
| 9305 |
+
"step": 1320
|
| 9306 |
+
},
|
| 9307 |
+
{
|
| 9308 |
+
"epoch": 0.09487557007936223,
|
| 9309 |
+
"grad_norm": 383.62432861328125,
|
| 9310 |
+
"learning_rate": 0.0002,
|
| 9311 |
+
"loss": 0.139,
|
| 9312 |
+
"step": 1321
|
| 9313 |
+
},
|
| 9314 |
+
{
|
| 9315 |
+
"epoch": 0.09494739110137537,
|
| 9316 |
+
"grad_norm": 223.98333740234375,
|
| 9317 |
+
"learning_rate": 0.0002,
|
| 9318 |
+
"loss": 0.1204,
|
| 9319 |
+
"step": 1322
|
| 9320 |
+
},
|
| 9321 |
+
{
|
| 9322 |
+
"epoch": 0.09501921212338851,
|
| 9323 |
+
"grad_norm": 273.00860595703125,
|
| 9324 |
+
"learning_rate": 0.0002,
|
| 9325 |
+
"loss": 0.0478,
|
| 9326 |
+
"step": 1323
|
| 9327 |
+
},
|
| 9328 |
+
{
|
| 9329 |
+
"epoch": 0.09509103314540165,
|
| 9330 |
+
"grad_norm": 407.0110778808594,
|
| 9331 |
+
"learning_rate": 0.0002,
|
| 9332 |
+
"loss": 0.0577,
|
| 9333 |
+
"step": 1324
|
| 9334 |
+
},
|
| 9335 |
+
{
|
| 9336 |
+
"epoch": 0.0951628541674148,
|
| 9337 |
+
"grad_norm": 363.35601806640625,
|
| 9338 |
+
"learning_rate": 0.0002,
|
| 9339 |
+
"loss": 0.0941,
|
| 9340 |
+
"step": 1325
|
| 9341 |
+
},
|
| 9342 |
+
{
|
| 9343 |
+
"epoch": 0.09523467518942795,
|
| 9344 |
+
"grad_norm": 942.7997436523438,
|
| 9345 |
+
"learning_rate": 0.0002,
|
| 9346 |
+
"loss": 0.2088,
|
| 9347 |
+
"step": 1326
|
| 9348 |
+
},
|
| 9349 |
+
{
|
| 9350 |
+
"epoch": 0.09530649621144109,
|
| 9351 |
+
"grad_norm": 150.12387084960938,
|
| 9352 |
+
"learning_rate": 0.0002,
|
| 9353 |
+
"loss": 0.2862,
|
| 9354 |
+
"step": 1327
|
| 9355 |
+
},
|
| 9356 |
+
{
|
| 9357 |
+
"epoch": 0.09537831723345423,
|
| 9358 |
+
"grad_norm": 182.57179260253906,
|
| 9359 |
+
"learning_rate": 0.0002,
|
| 9360 |
+
"loss": 0.1547,
|
| 9361 |
+
"step": 1328
|
| 9362 |
+
},
|
| 9363 |
+
{
|
| 9364 |
+
"epoch": 0.09545013825546737,
|
| 9365 |
+
"grad_norm": 103.62860107421875,
|
| 9366 |
+
"learning_rate": 0.0002,
|
| 9367 |
+
"loss": 0.0762,
|
| 9368 |
+
"step": 1329
|
| 9369 |
+
},
|
| 9370 |
+
{
|
| 9371 |
+
"epoch": 0.09552195927748051,
|
| 9372 |
+
"grad_norm": 1429.3486328125,
|
| 9373 |
+
"learning_rate": 0.0002,
|
| 9374 |
+
"loss": 0.07,
|
| 9375 |
+
"step": 1330
|
| 9376 |
+
},
|
| 9377 |
+
{
|
| 9378 |
+
"epoch": 0.09559378029949366,
|
| 9379 |
+
"grad_norm": 617.5159301757812,
|
| 9380 |
+
"learning_rate": 0.0002,
|
| 9381 |
+
"loss": 0.0941,
|
| 9382 |
+
"step": 1331
|
| 9383 |
+
},
|
| 9384 |
+
{
|
| 9385 |
+
"epoch": 0.09566560132150681,
|
| 9386 |
+
"grad_norm": 1300.7772216796875,
|
| 9387 |
+
"learning_rate": 0.0002,
|
| 9388 |
+
"loss": 0.1346,
|
| 9389 |
+
"step": 1332
|
| 9390 |
+
},
|
| 9391 |
+
{
|
| 9392 |
+
"epoch": 0.09573742234351995,
|
| 9393 |
+
"grad_norm": 3412.166015625,
|
| 9394 |
+
"learning_rate": 0.0002,
|
| 9395 |
+
"loss": 0.2213,
|
| 9396 |
+
"step": 1333
|
| 9397 |
+
},
|
| 9398 |
+
{
|
| 9399 |
+
"epoch": 0.09580924336553309,
|
| 9400 |
+
"grad_norm": 671.9053344726562,
|
| 9401 |
+
"learning_rate": 0.0002,
|
| 9402 |
+
"loss": 0.5833,
|
| 9403 |
+
"step": 1334
|
| 9404 |
+
},
|
| 9405 |
+
{
|
| 9406 |
+
"epoch": 0.09588106438754623,
|
| 9407 |
+
"grad_norm": 921.5567626953125,
|
| 9408 |
+
"learning_rate": 0.0002,
|
| 9409 |
+
"loss": 3.0773,
|
| 9410 |
+
"step": 1335
|
| 9411 |
+
},
|
| 9412 |
+
{
|
| 9413 |
+
"epoch": 0.09595288540955937,
|
| 9414 |
+
"grad_norm": 285.1908264160156,
|
| 9415 |
+
"learning_rate": 0.0002,
|
| 9416 |
+
"loss": 5.836,
|
| 9417 |
+
"step": 1336
|
| 9418 |
+
},
|
| 9419 |
+
{
|
| 9420 |
+
"epoch": 0.09602470643157252,
|
| 9421 |
+
"grad_norm": 849.9166259765625,
|
| 9422 |
+
"learning_rate": 0.0002,
|
| 9423 |
+
"loss": 3.8551,
|
| 9424 |
+
"step": 1337
|
| 9425 |
+
},
|
| 9426 |
+
{
|
| 9427 |
+
"epoch": 0.09609652745358567,
|
| 9428 |
+
"grad_norm": 164.85279846191406,
|
| 9429 |
+
"learning_rate": 0.0002,
|
| 9430 |
+
"loss": 2.9356,
|
| 9431 |
+
"step": 1338
|
| 9432 |
+
},
|
| 9433 |
+
{
|
| 9434 |
+
"epoch": 0.09616834847559881,
|
| 9435 |
+
"grad_norm": 124.32804870605469,
|
| 9436 |
+
"learning_rate": 0.0002,
|
| 9437 |
+
"loss": 1.8263,
|
| 9438 |
+
"step": 1339
|
| 9439 |
+
},
|
| 9440 |
+
{
|
| 9441 |
+
"epoch": 0.09624016949761195,
|
| 9442 |
+
"grad_norm": 247.00018310546875,
|
| 9443 |
+
"learning_rate": 0.0002,
|
| 9444 |
+
"loss": 1.0235,
|
| 9445 |
+
"step": 1340
|
| 9446 |
+
},
|
| 9447 |
+
{
|
| 9448 |
+
"epoch": 0.09631199051962509,
|
| 9449 |
+
"grad_norm": 74.94400787353516,
|
| 9450 |
+
"learning_rate": 0.0002,
|
| 9451 |
+
"loss": 0.7707,
|
| 9452 |
+
"step": 1341
|
| 9453 |
+
},
|
| 9454 |
+
{
|
| 9455 |
+
"epoch": 0.09638381154163823,
|
| 9456 |
+
"grad_norm": 94.11992645263672,
|
| 9457 |
+
"learning_rate": 0.0002,
|
| 9458 |
+
"loss": 0.3988,
|
| 9459 |
+
"step": 1342
|
| 9460 |
+
},
|
| 9461 |
+
{
|
| 9462 |
+
"epoch": 0.09645563256365138,
|
| 9463 |
+
"grad_norm": 76.706298828125,
|
| 9464 |
+
"learning_rate": 0.0002,
|
| 9465 |
+
"loss": 0.3303,
|
| 9466 |
+
"step": 1343
|
| 9467 |
+
},
|
| 9468 |
+
{
|
| 9469 |
+
"epoch": 0.09652745358566453,
|
| 9470 |
+
"grad_norm": 57.74739074707031,
|
| 9471 |
+
"learning_rate": 0.0002,
|
| 9472 |
+
"loss": 0.2427,
|
| 9473 |
+
"step": 1344
|
| 9474 |
+
},
|
| 9475 |
+
{
|
| 9476 |
+
"epoch": 0.09659927460767767,
|
| 9477 |
+
"grad_norm": 19.03093147277832,
|
| 9478 |
+
"learning_rate": 0.0002,
|
| 9479 |
+
"loss": 0.1481,
|
| 9480 |
+
"step": 1345
|
| 9481 |
+
},
|
| 9482 |
+
{
|
| 9483 |
+
"epoch": 0.09667109562969081,
|
| 9484 |
+
"grad_norm": 42.87152862548828,
|
| 9485 |
+
"learning_rate": 0.0002,
|
| 9486 |
+
"loss": 0.1765,
|
| 9487 |
+
"step": 1346
|
| 9488 |
+
},
|
| 9489 |
+
{
|
| 9490 |
+
"epoch": 0.09674291665170395,
|
| 9491 |
+
"grad_norm": 36.97148895263672,
|
| 9492 |
+
"learning_rate": 0.0002,
|
| 9493 |
+
"loss": 0.1738,
|
| 9494 |
+
"step": 1347
|
| 9495 |
+
},
|
| 9496 |
+
{
|
| 9497 |
+
"epoch": 0.0968147376737171,
|
| 9498 |
+
"grad_norm": 7.128145694732666,
|
| 9499 |
+
"learning_rate": 0.0002,
|
| 9500 |
+
"loss": 0.0419,
|
| 9501 |
+
"step": 1348
|
| 9502 |
+
},
|
| 9503 |
+
{
|
| 9504 |
+
"epoch": 0.09688655869573023,
|
| 9505 |
+
"grad_norm": 13.10932731628418,
|
| 9506 |
+
"learning_rate": 0.0002,
|
| 9507 |
+
"loss": 0.033,
|
| 9508 |
+
"step": 1349
|
| 9509 |
+
},
|
| 9510 |
+
{
|
| 9511 |
+
"epoch": 0.09695837971774339,
|
| 9512 |
+
"grad_norm": 22.414165496826172,
|
| 9513 |
+
"learning_rate": 0.0002,
|
| 9514 |
+
"loss": 0.0831,
|
| 9515 |
+
"step": 1350
|
| 9516 |
+
},
|
| 9517 |
+
{
|
| 9518 |
+
"epoch": 0.09703020073975653,
|
| 9519 |
+
"grad_norm": 1544.4285888671875,
|
| 9520 |
+
"learning_rate": 0.0002,
|
| 9521 |
+
"loss": 7.3493,
|
| 9522 |
+
"step": 1351
|
| 9523 |
+
},
|
| 9524 |
+
{
|
| 9525 |
+
"epoch": 0.09710202176176967,
|
| 9526 |
+
"grad_norm": 1028.979248046875,
|
| 9527 |
+
"learning_rate": 0.0002,
|
| 9528 |
+
"loss": 6.2821,
|
| 9529 |
+
"step": 1352
|
| 9530 |
+
},
|
| 9531 |
+
{
|
| 9532 |
+
"epoch": 0.09717384278378281,
|
| 9533 |
+
"grad_norm": 869.6739501953125,
|
| 9534 |
+
"learning_rate": 0.0002,
|
| 9535 |
+
"loss": 3.264,
|
| 9536 |
+
"step": 1353
|
| 9537 |
+
},
|
| 9538 |
+
{
|
| 9539 |
+
"epoch": 0.09724566380579595,
|
| 9540 |
+
"grad_norm": 761.1847534179688,
|
| 9541 |
+
"learning_rate": 0.0002,
|
| 9542 |
+
"loss": 2.33,
|
| 9543 |
+
"step": 1354
|
| 9544 |
+
},
|
| 9545 |
+
{
|
| 9546 |
+
"epoch": 0.0973174848278091,
|
| 9547 |
+
"grad_norm": 276.9541320800781,
|
| 9548 |
+
"learning_rate": 0.0002,
|
| 9549 |
+
"loss": 1.3614,
|
| 9550 |
+
"step": 1355
|
| 9551 |
+
},
|
| 9552 |
+
{
|
| 9553 |
+
"epoch": 0.09738930584982225,
|
| 9554 |
+
"grad_norm": 117.37969970703125,
|
| 9555 |
+
"learning_rate": 0.0002,
|
| 9556 |
+
"loss": 0.6368,
|
| 9557 |
+
"step": 1356
|
| 9558 |
+
},
|
| 9559 |
+
{
|
| 9560 |
+
"epoch": 0.09746112687183539,
|
| 9561 |
+
"grad_norm": 82.56239318847656,
|
| 9562 |
+
"learning_rate": 0.0002,
|
| 9563 |
+
"loss": 0.3213,
|
| 9564 |
+
"step": 1357
|
| 9565 |
+
},
|
| 9566 |
+
{
|
| 9567 |
+
"epoch": 0.09753294789384853,
|
| 9568 |
+
"grad_norm": 43.320228576660156,
|
| 9569 |
+
"learning_rate": 0.0002,
|
| 9570 |
+
"loss": 0.2298,
|
| 9571 |
+
"step": 1358
|
| 9572 |
+
},
|
| 9573 |
+
{
|
| 9574 |
+
"epoch": 0.09760476891586167,
|
| 9575 |
+
"grad_norm": 20.17310333251953,
|
| 9576 |
+
"learning_rate": 0.0002,
|
| 9577 |
+
"loss": 0.1338,
|
| 9578 |
+
"step": 1359
|
| 9579 |
+
},
|
| 9580 |
+
{
|
| 9581 |
+
"epoch": 0.09767658993787481,
|
| 9582 |
+
"grad_norm": 11.756632804870605,
|
| 9583 |
+
"learning_rate": 0.0002,
|
| 9584 |
+
"loss": 0.0363,
|
| 9585 |
+
"step": 1360
|
| 9586 |
+
},
|
| 9587 |
+
{
|
| 9588 |
+
"epoch": 0.09774841095988795,
|
| 9589 |
+
"grad_norm": 2.904691457748413,
|
| 9590 |
+
"learning_rate": 0.0002,
|
| 9591 |
+
"loss": 0.0041,
|
| 9592 |
+
"step": 1361
|
| 9593 |
+
},
|
| 9594 |
+
{
|
| 9595 |
+
"epoch": 0.0978202319819011,
|
| 9596 |
+
"grad_norm": 4.21614408493042,
|
| 9597 |
+
"learning_rate": 0.0002,
|
| 9598 |
+
"loss": 0.0053,
|
| 9599 |
+
"step": 1362
|
| 9600 |
+
},
|
| 9601 |
+
{
|
| 9602 |
+
"epoch": 0.09789205300391425,
|
| 9603 |
+
"grad_norm": 54.37326431274414,
|
| 9604 |
+
"learning_rate": 0.0002,
|
| 9605 |
+
"loss": 0.0113,
|
| 9606 |
+
"step": 1363
|
| 9607 |
+
},
|
| 9608 |
+
{
|
| 9609 |
+
"epoch": 0.09796387402592739,
|
| 9610 |
+
"grad_norm": 62.765384674072266,
|
| 9611 |
+
"learning_rate": 0.0002,
|
| 9612 |
+
"loss": 0.0713,
|
| 9613 |
+
"step": 1364
|
| 9614 |
+
},
|
| 9615 |
+
{
|
| 9616 |
+
"epoch": 0.09803569504794053,
|
| 9617 |
+
"grad_norm": 83.12686920166016,
|
| 9618 |
+
"learning_rate": 0.0002,
|
| 9619 |
+
"loss": 0.0344,
|
| 9620 |
+
"step": 1365
|
| 9621 |
+
},
|
| 9622 |
+
{
|
| 9623 |
+
"epoch": 0.09810751606995367,
|
| 9624 |
+
"grad_norm": 455.5434875488281,
|
| 9625 |
+
"learning_rate": 0.0002,
|
| 9626 |
+
"loss": 0.0518,
|
| 9627 |
+
"step": 1366
|
| 9628 |
+
},
|
| 9629 |
+
{
|
| 9630 |
+
"epoch": 0.09817933709196681,
|
| 9631 |
+
"grad_norm": 86.92057037353516,
|
| 9632 |
+
"learning_rate": 0.0002,
|
| 9633 |
+
"loss": 0.0511,
|
| 9634 |
+
"step": 1367
|
| 9635 |
+
},
|
| 9636 |
+
{
|
| 9637 |
+
"epoch": 0.09825115811397996,
|
| 9638 |
+
"grad_norm": 26.04575538635254,
|
| 9639 |
+
"learning_rate": 0.0002,
|
| 9640 |
+
"loss": 0.0476,
|
| 9641 |
+
"step": 1368
|
| 9642 |
+
},
|
| 9643 |
+
{
|
| 9644 |
+
"epoch": 0.09832297913599311,
|
| 9645 |
+
"grad_norm": 113.25431060791016,
|
| 9646 |
+
"learning_rate": 0.0002,
|
| 9647 |
+
"loss": 0.0259,
|
| 9648 |
+
"step": 1369
|
| 9649 |
+
},
|
| 9650 |
+
{
|
| 9651 |
+
"epoch": 0.09839480015800625,
|
| 9652 |
+
"grad_norm": 675.1182861328125,
|
| 9653 |
+
"learning_rate": 0.0002,
|
| 9654 |
+
"loss": 0.051,
|
| 9655 |
+
"step": 1370
|
| 9656 |
+
},
|
| 9657 |
+
{
|
| 9658 |
+
"epoch": 0.09846662118001939,
|
| 9659 |
+
"grad_norm": 330.5151672363281,
|
| 9660 |
+
"learning_rate": 0.0002,
|
| 9661 |
+
"loss": 0.0303,
|
| 9662 |
+
"step": 1371
|
| 9663 |
+
},
|
| 9664 |
+
{
|
| 9665 |
+
"epoch": 0.09853844220203253,
|
| 9666 |
+
"grad_norm": 151.39495849609375,
|
| 9667 |
+
"learning_rate": 0.0002,
|
| 9668 |
+
"loss": 0.0614,
|
| 9669 |
+
"step": 1372
|
| 9670 |
+
},
|
| 9671 |
+
{
|
| 9672 |
+
"epoch": 0.09861026322404567,
|
| 9673 |
+
"grad_norm": 447.9998474121094,
|
| 9674 |
+
"learning_rate": 0.0002,
|
| 9675 |
+
"loss": 0.1527,
|
| 9676 |
+
"step": 1373
|
| 9677 |
+
},
|
| 9678 |
+
{
|
| 9679 |
+
"epoch": 0.09868208424605882,
|
| 9680 |
+
"grad_norm": 638.3504028320312,
|
| 9681 |
+
"learning_rate": 0.0002,
|
| 9682 |
+
"loss": 0.2173,
|
| 9683 |
+
"step": 1374
|
| 9684 |
+
},
|
| 9685 |
+
{
|
| 9686 |
+
"epoch": 0.09875390526807197,
|
| 9687 |
+
"grad_norm": 1721.16015625,
|
| 9688 |
+
"learning_rate": 0.0002,
|
| 9689 |
+
"loss": 0.5062,
|
| 9690 |
+
"step": 1375
|
| 9691 |
+
},
|
| 9692 |
+
{
|
| 9693 |
+
"epoch": 0.09882572629008511,
|
| 9694 |
+
"grad_norm": 1642.75634765625,
|
| 9695 |
+
"learning_rate": 0.0002,
|
| 9696 |
+
"loss": 0.5763,
|
| 9697 |
+
"step": 1376
|
| 9698 |
+
},
|
| 9699 |
+
{
|
| 9700 |
+
"epoch": 0.09889754731209825,
|
| 9701 |
+
"grad_norm": 9905.47265625,
|
| 9702 |
+
"learning_rate": 0.0002,
|
| 9703 |
+
"loss": 0.7611,
|
| 9704 |
+
"step": 1377
|
| 9705 |
+
},
|
| 9706 |
+
{
|
| 9707 |
+
"epoch": 0.0989693683341114,
|
| 9708 |
+
"grad_norm": 2007.0882568359375,
|
| 9709 |
+
"learning_rate": 0.0002,
|
| 9710 |
+
"loss": 1.2408,
|
| 9711 |
+
"step": 1378
|
| 9712 |
+
},
|
| 9713 |
+
{
|
| 9714 |
+
"epoch": 0.09904118935612453,
|
| 9715 |
+
"grad_norm": 832.2088623046875,
|
| 9716 |
+
"learning_rate": 0.0002,
|
| 9717 |
+
"loss": 1.2982,
|
| 9718 |
+
"step": 1379
|
| 9719 |
+
},
|
| 9720 |
+
{
|
| 9721 |
+
"epoch": 0.09911301037813768,
|
| 9722 |
+
"grad_norm": 4193.06640625,
|
| 9723 |
+
"learning_rate": 0.0002,
|
| 9724 |
+
"loss": 1.1366,
|
| 9725 |
+
"step": 1380
|
| 9726 |
+
},
|
| 9727 |
+
{
|
| 9728 |
+
"epoch": 0.09918483140015083,
|
| 9729 |
+
"grad_norm": 122.19583892822266,
|
| 9730 |
+
"learning_rate": 0.0002,
|
| 9731 |
+
"loss": 0.708,
|
| 9732 |
+
"step": 1381
|
| 9733 |
+
},
|
| 9734 |
+
{
|
| 9735 |
+
"epoch": 0.09925665242216397,
|
| 9736 |
+
"grad_norm": 3087.454833984375,
|
| 9737 |
+
"learning_rate": 0.0002,
|
| 9738 |
+
"loss": 0.6747,
|
| 9739 |
+
"step": 1382
|
| 9740 |
+
},
|
| 9741 |
+
{
|
| 9742 |
+
"epoch": 0.09932847344417711,
|
| 9743 |
+
"grad_norm": 434.3843994140625,
|
| 9744 |
+
"learning_rate": 0.0002,
|
| 9745 |
+
"loss": 0.6661,
|
| 9746 |
+
"step": 1383
|
| 9747 |
+
},
|
| 9748 |
+
{
|
| 9749 |
+
"epoch": 0.09940029446619025,
|
| 9750 |
+
"grad_norm": 109.60822296142578,
|
| 9751 |
+
"learning_rate": 0.0002,
|
| 9752 |
+
"loss": 0.6208,
|
| 9753 |
+
"step": 1384
|
| 9754 |
+
},
|
| 9755 |
+
{
|
| 9756 |
+
"epoch": 0.0994721154882034,
|
| 9757 |
+
"grad_norm": 212.38809204101562,
|
| 9758 |
+
"learning_rate": 0.0002,
|
| 9759 |
+
"loss": 1.7644,
|
| 9760 |
+
"step": 1385
|
| 9761 |
+
},
|
| 9762 |
+
{
|
| 9763 |
+
"epoch": 0.09954393651021654,
|
| 9764 |
+
"grad_norm": 495.2514343261719,
|
| 9765 |
+
"learning_rate": 0.0002,
|
| 9766 |
+
"loss": 1.2185,
|
| 9767 |
+
"step": 1386
|
| 9768 |
+
},
|
| 9769 |
+
{
|
| 9770 |
+
"epoch": 0.09961575753222969,
|
| 9771 |
+
"grad_norm": 230.80685424804688,
|
| 9772 |
+
"learning_rate": 0.0002,
|
| 9773 |
+
"loss": 1.1572,
|
| 9774 |
+
"step": 1387
|
| 9775 |
+
},
|
| 9776 |
+
{
|
| 9777 |
+
"epoch": 0.09968757855424283,
|
| 9778 |
+
"grad_norm": 109.29719543457031,
|
| 9779 |
+
"learning_rate": 0.0002,
|
| 9780 |
+
"loss": 1.1146,
|
| 9781 |
+
"step": 1388
|
| 9782 |
+
},
|
| 9783 |
+
{
|
| 9784 |
+
"epoch": 0.09975939957625597,
|
| 9785 |
+
"grad_norm": 97.48197937011719,
|
| 9786 |
+
"learning_rate": 0.0002,
|
| 9787 |
+
"loss": 0.7324,
|
| 9788 |
+
"step": 1389
|
| 9789 |
+
},
|
| 9790 |
+
{
|
| 9791 |
+
"epoch": 0.09983122059826911,
|
| 9792 |
+
"grad_norm": 146.43472290039062,
|
| 9793 |
+
"learning_rate": 0.0002,
|
| 9794 |
+
"loss": 0.7103,
|
| 9795 |
+
"step": 1390
|
| 9796 |
+
},
|
| 9797 |
+
{
|
| 9798 |
+
"epoch": 0.09990304162028225,
|
| 9799 |
+
"grad_norm": 40.64527893066406,
|
| 9800 |
+
"learning_rate": 0.0002,
|
| 9801 |
+
"loss": 0.3192,
|
| 9802 |
+
"step": 1391
|
| 9803 |
+
},
|
| 9804 |
+
{
|
| 9805 |
+
"epoch": 0.0999748626422954,
|
| 9806 |
+
"grad_norm": 28.65498924255371,
|
| 9807 |
+
"learning_rate": 0.0002,
|
| 9808 |
+
"loss": 0.1344,
|
| 9809 |
+
"step": 1392
|
| 9810 |
+
},
|
| 9811 |
+
{
|
| 9812 |
+
"epoch": 0.10004668366430855,
|
| 9813 |
+
"grad_norm": 22.615970611572266,
|
| 9814 |
+
"learning_rate": 0.0002,
|
| 9815 |
+
"loss": 0.055,
|
| 9816 |
+
"step": 1393
|
| 9817 |
+
},
|
| 9818 |
+
{
|
| 9819 |
+
"epoch": 0.10011850468632169,
|
| 9820 |
+
"grad_norm": 15.91791820526123,
|
| 9821 |
+
"learning_rate": 0.0002,
|
| 9822 |
+
"loss": 0.061,
|
| 9823 |
+
"step": 1394
|
| 9824 |
+
},
|
| 9825 |
+
{
|
| 9826 |
+
"epoch": 0.10019032570833483,
|
| 9827 |
+
"grad_norm": 21.31641387939453,
|
| 9828 |
+
"learning_rate": 0.0002,
|
| 9829 |
+
"loss": 0.0616,
|
| 9830 |
+
"step": 1395
|
| 9831 |
+
},
|
| 9832 |
+
{
|
| 9833 |
+
"epoch": 0.10026214673034797,
|
| 9834 |
+
"grad_norm": 7.001412868499756,
|
| 9835 |
+
"learning_rate": 0.0002,
|
| 9836 |
+
"loss": 0.021,
|
| 9837 |
+
"step": 1396
|
| 9838 |
+
},
|
| 9839 |
+
{
|
| 9840 |
+
"epoch": 0.10033396775236111,
|
| 9841 |
+
"grad_norm": 5.590838432312012,
|
| 9842 |
+
"learning_rate": 0.0002,
|
| 9843 |
+
"loss": 0.0064,
|
| 9844 |
+
"step": 1397
|
| 9845 |
+
},
|
| 9846 |
+
{
|
| 9847 |
+
"epoch": 0.10040578877437425,
|
| 9848 |
+
"grad_norm": 13.681070327758789,
|
| 9849 |
+
"learning_rate": 0.0002,
|
| 9850 |
+
"loss": 0.004,
|
| 9851 |
+
"step": 1398
|
| 9852 |
+
},
|
| 9853 |
+
{
|
| 9854 |
+
"epoch": 0.1004776097963874,
|
| 9855 |
+
"grad_norm": 24.74186134338379,
|
| 9856 |
+
"learning_rate": 0.0002,
|
| 9857 |
+
"loss": 0.0127,
|
| 9858 |
+
"step": 1399
|
| 9859 |
+
},
|
| 9860 |
+
{
|
| 9861 |
+
"epoch": 0.10054943081840055,
|
| 9862 |
+
"grad_norm": 8.081853866577148,
|
| 9863 |
+
"learning_rate": 0.0002,
|
| 9864 |
+
"loss": 0.0202,
|
| 9865 |
+
"step": 1400
|
| 9866 |
+
},
|
| 9867 |
+
{
|
| 9868 |
+
"epoch": 0.10054943081840055,
|
| 9869 |
+
"eval_loss": 2.930184841156006,
|
| 9870 |
+
"eval_runtime": 98.1573,
|
| 9871 |
+
"eval_samples_per_second": 5.705,
|
| 9872 |
+
"eval_steps_per_second": 5.705,
|
| 9873 |
+
"step": 1400
|
| 9874 |
}
|
| 9875 |
],
|
| 9876 |
"logging_steps": 1,
|
|
|
|
| 9885 |
"early_stopping_threshold": 0.0
|
| 9886 |
},
|
| 9887 |
"attributes": {
|
| 9888 |
+
"early_stopping_patience_counter": 3
|
| 9889 |
}
|
| 9890 |
},
|
| 9891 |
"TrainerControl": {
|
|
|
|
| 9899 |
"attributes": {}
|
| 9900 |
}
|
| 9901 |
},
|
| 9902 |
+
"total_flos": 2.313389800024965e+17,
|
| 9903 |
"train_batch_size": 1,
|
| 9904 |
"trial_name": null,
|
| 9905 |
"trial_params": null
|