Fanucci
commited on
Training in progress, step 4800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 313820248
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9922e74509fb13efbccd3ba7174c5a1728912ab70e810a006d67050ce25220b2
|
| 3 |
size 313820248
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 159641284
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ea4f6b1f4f5589cba5faa5a65666cd0b537a9d6b7c63457c480818b8611f23a
|
| 3 |
size 159641284
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5db58e25f34baf46282e0add2052064ceb70ad7e4fc3207681eab0aa57153303
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61c4ee903d44eec8ab3e54808db8a66c241209cc32680ec2de21c4239dc9081f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -32813,6 +32813,1064 @@
|
|
| 32813 |
"eval_samples_per_second": 14.182,
|
| 32814 |
"eval_steps_per_second": 7.091,
|
| 32815 |
"step": 4650
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32816 |
}
|
| 32817 |
],
|
| 32818 |
"logging_steps": 1,
|
|
@@ -32836,12 +33894,12 @@
|
|
| 32836 |
"should_evaluate": false,
|
| 32837 |
"should_log": false,
|
| 32838 |
"should_save": true,
|
| 32839 |
-
"should_training_stop":
|
| 32840 |
},
|
| 32841 |
"attributes": {}
|
| 32842 |
}
|
| 32843 |
},
|
| 32844 |
-
"total_flos": 9.
|
| 32845 |
"train_batch_size": 2,
|
| 32846 |
"trial_name": null,
|
| 32847 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.3833869695663452,
|
| 3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-4800",
|
| 4 |
+
"epoch": 0.301644907385587,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 4800,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 32813 |
"eval_samples_per_second": 14.182,
|
| 32814 |
"eval_steps_per_second": 7.091,
|
| 32815 |
"step": 4650
|
| 32816 |
+
},
|
| 32817 |
+
{
|
| 32818 |
+
"epoch": 0.2922813467188261,
|
| 32819 |
+
"grad_norm": 2.8753435611724854,
|
| 32820 |
+
"learning_rate": 4.771177875110389e-07,
|
| 32821 |
+
"loss": 0.5849,
|
| 32822 |
+
"step": 4651
|
| 32823 |
+
},
|
| 32824 |
+
{
|
| 32825 |
+
"epoch": 0.2923441894078648,
|
| 32826 |
+
"grad_norm": 3.8632307052612305,
|
| 32827 |
+
"learning_rate": 4.707400251966698e-07,
|
| 32828 |
+
"loss": 0.5191,
|
| 32829 |
+
"step": 4652
|
| 32830 |
+
},
|
| 32831 |
+
{
|
| 32832 |
+
"epoch": 0.29240703209690344,
|
| 32833 |
+
"grad_norm": 2.1888105869293213,
|
| 32834 |
+
"learning_rate": 4.6440507625861165e-07,
|
| 32835 |
+
"loss": 0.3063,
|
| 32836 |
+
"step": 4653
|
| 32837 |
+
},
|
| 32838 |
+
{
|
| 32839 |
+
"epoch": 0.2924698747859421,
|
| 32840 |
+
"grad_norm": 1.8767521381378174,
|
| 32841 |
+
"learning_rate": 4.581129434218734e-07,
|
| 32842 |
+
"loss": 0.302,
|
| 32843 |
+
"step": 4654
|
| 32844 |
+
},
|
| 32845 |
+
{
|
| 32846 |
+
"epoch": 0.2925327174749808,
|
| 32847 |
+
"grad_norm": 2.4371023178100586,
|
| 32848 |
+
"learning_rate": 4.5186362939307893e-07,
|
| 32849 |
+
"loss": 0.5136,
|
| 32850 |
+
"step": 4655
|
| 32851 |
+
},
|
| 32852 |
+
{
|
| 32853 |
+
"epoch": 0.29259556016401944,
|
| 32854 |
+
"grad_norm": 3.136687755584717,
|
| 32855 |
+
"learning_rate": 4.456571368604334e-07,
|
| 32856 |
+
"loss": 0.8438,
|
| 32857 |
+
"step": 4656
|
| 32858 |
+
},
|
| 32859 |
+
{
|
| 32860 |
+
"epoch": 0.2926584028530581,
|
| 32861 |
+
"grad_norm": 3.2540433406829834,
|
| 32862 |
+
"learning_rate": 4.394934684937013e-07,
|
| 32863 |
+
"loss": 0.4804,
|
| 32864 |
+
"step": 4657
|
| 32865 |
+
},
|
| 32866 |
+
{
|
| 32867 |
+
"epoch": 0.2927212455420967,
|
| 32868 |
+
"grad_norm": 3.366769313812256,
|
| 32869 |
+
"learning_rate": 4.3337262694425064e-07,
|
| 32870 |
+
"loss": 0.6449,
|
| 32871 |
+
"step": 4658
|
| 32872 |
+
},
|
| 32873 |
+
{
|
| 32874 |
+
"epoch": 0.2927840882311354,
|
| 32875 |
+
"grad_norm": 2.422929286956787,
|
| 32876 |
+
"learning_rate": 4.2729461484499744e-07,
|
| 32877 |
+
"loss": 0.3448,
|
| 32878 |
+
"step": 4659
|
| 32879 |
+
},
|
| 32880 |
+
{
|
| 32881 |
+
"epoch": 0.29284693092017405,
|
| 32882 |
+
"grad_norm": 2.37197208404541,
|
| 32883 |
+
"learning_rate": 4.2125943481046147e-07,
|
| 32884 |
+
"loss": 0.3717,
|
| 32885 |
+
"step": 4660
|
| 32886 |
+
},
|
| 32887 |
+
{
|
| 32888 |
+
"epoch": 0.2929097736092127,
|
| 32889 |
+
"grad_norm": 2.8160324096679688,
|
| 32890 |
+
"learning_rate": 4.1526708943673274e-07,
|
| 32891 |
+
"loss": 0.7187,
|
| 32892 |
+
"step": 4661
|
| 32893 |
+
},
|
| 32894 |
+
{
|
| 32895 |
+
"epoch": 0.2929726162982514,
|
| 32896 |
+
"grad_norm": 2.3461647033691406,
|
| 32897 |
+
"learning_rate": 4.093175813014605e-07,
|
| 32898 |
+
"loss": 0.4554,
|
| 32899 |
+
"step": 4662
|
| 32900 |
+
},
|
| 32901 |
+
{
|
| 32902 |
+
"epoch": 0.29303545898729005,
|
| 32903 |
+
"grad_norm": 2.2643139362335205,
|
| 32904 |
+
"learning_rate": 4.0341091296388634e-07,
|
| 32905 |
+
"loss": 0.3153,
|
| 32906 |
+
"step": 4663
|
| 32907 |
+
},
|
| 32908 |
+
{
|
| 32909 |
+
"epoch": 0.2930983016763287,
|
| 32910 |
+
"grad_norm": 3.028071641921997,
|
| 32911 |
+
"learning_rate": 3.9754708696481126e-07,
|
| 32912 |
+
"loss": 0.5468,
|
| 32913 |
+
"step": 4664
|
| 32914 |
+
},
|
| 32915 |
+
{
|
| 32916 |
+
"epoch": 0.2931611443653674,
|
| 32917 |
+
"grad_norm": 2.815885305404663,
|
| 32918 |
+
"learning_rate": 3.917261058266064e-07,
|
| 32919 |
+
"loss": 0.4345,
|
| 32920 |
+
"step": 4665
|
| 32921 |
+
},
|
| 32922 |
+
{
|
| 32923 |
+
"epoch": 0.29322398705440605,
|
| 32924 |
+
"grad_norm": 1.9186877012252808,
|
| 32925 |
+
"learning_rate": 3.859479720532244e-07,
|
| 32926 |
+
"loss": 0.3897,
|
| 32927 |
+
"step": 4666
|
| 32928 |
+
},
|
| 32929 |
+
{
|
| 32930 |
+
"epoch": 0.2932868297434447,
|
| 32931 |
+
"grad_norm": 2.119145154953003,
|
| 32932 |
+
"learning_rate": 3.8021268813017705e-07,
|
| 32933 |
+
"loss": 0.3238,
|
| 32934 |
+
"step": 4667
|
| 32935 |
+
},
|
| 32936 |
+
{
|
| 32937 |
+
"epoch": 0.2933496724324834,
|
| 32938 |
+
"grad_norm": 3.027660369873047,
|
| 32939 |
+
"learning_rate": 3.7452025652454646e-07,
|
| 32940 |
+
"loss": 0.7476,
|
| 32941 |
+
"step": 4668
|
| 32942 |
+
},
|
| 32943 |
+
{
|
| 32944 |
+
"epoch": 0.29341251512152206,
|
| 32945 |
+
"grad_norm": 2.03147292137146,
|
| 32946 |
+
"learning_rate": 3.6887067968498503e-07,
|
| 32947 |
+
"loss": 0.34,
|
| 32948 |
+
"step": 4669
|
| 32949 |
+
},
|
| 32950 |
+
{
|
| 32951 |
+
"epoch": 0.2934753578105607,
|
| 32952 |
+
"grad_norm": 2.454624652862549,
|
| 32953 |
+
"learning_rate": 3.632639600416932e-07,
|
| 32954 |
+
"loss": 0.2504,
|
| 32955 |
+
"step": 4670
|
| 32956 |
+
},
|
| 32957 |
+
{
|
| 32958 |
+
"epoch": 0.2935382004995994,
|
| 32959 |
+
"grad_norm": 2.2927448749542236,
|
| 32960 |
+
"learning_rate": 3.5770010000646395e-07,
|
| 32961 |
+
"loss": 0.4647,
|
| 32962 |
+
"step": 4671
|
| 32963 |
+
},
|
| 32964 |
+
{
|
| 32965 |
+
"epoch": 0.29360104318863806,
|
| 32966 |
+
"grad_norm": 2.39454984664917,
|
| 32967 |
+
"learning_rate": 3.5217910197264946e-07,
|
| 32968 |
+
"loss": 0.4217,
|
| 32969 |
+
"step": 4672
|
| 32970 |
+
},
|
| 32971 |
+
{
|
| 32972 |
+
"epoch": 0.2936638858776767,
|
| 32973 |
+
"grad_norm": 2.4792912006378174,
|
| 32974 |
+
"learning_rate": 3.467009683151279e-07,
|
| 32975 |
+
"loss": 0.479,
|
| 32976 |
+
"step": 4673
|
| 32977 |
+
},
|
| 32978 |
+
{
|
| 32979 |
+
"epoch": 0.2937267285667154,
|
| 32980 |
+
"grad_norm": 2.1187503337860107,
|
| 32981 |
+
"learning_rate": 3.4126570139039193e-07,
|
| 32982 |
+
"loss": 0.3695,
|
| 32983 |
+
"step": 4674
|
| 32984 |
+
},
|
| 32985 |
+
{
|
| 32986 |
+
"epoch": 0.29378957125575406,
|
| 32987 |
+
"grad_norm": 2.1744368076324463,
|
| 32988 |
+
"learning_rate": 3.3587330353644917e-07,
|
| 32989 |
+
"loss": 0.3705,
|
| 32990 |
+
"step": 4675
|
| 32991 |
+
},
|
| 32992 |
+
{
|
| 32993 |
+
"epoch": 0.2938524139447927,
|
| 32994 |
+
"grad_norm": 1.8642380237579346,
|
| 32995 |
+
"learning_rate": 3.3052377707289975e-07,
|
| 32996 |
+
"loss": 0.2265,
|
| 32997 |
+
"step": 4676
|
| 32998 |
+
},
|
| 32999 |
+
{
|
| 33000 |
+
"epoch": 0.29391525663383133,
|
| 33001 |
+
"grad_norm": 2.3252439498901367,
|
| 33002 |
+
"learning_rate": 3.252171243008806e-07,
|
| 33003 |
+
"loss": 0.3225,
|
| 33004 |
+
"step": 4677
|
| 33005 |
+
},
|
| 33006 |
+
{
|
| 33007 |
+
"epoch": 0.29397809932287,
|
| 33008 |
+
"grad_norm": 3.456021547317505,
|
| 33009 |
+
"learning_rate": 3.199533475031102e-07,
|
| 33010 |
+
"loss": 0.4448,
|
| 33011 |
+
"step": 4678
|
| 33012 |
+
},
|
| 33013 |
+
{
|
| 33014 |
+
"epoch": 0.29404094201190867,
|
| 33015 |
+
"grad_norm": 1.698840856552124,
|
| 33016 |
+
"learning_rate": 3.147324489438219e-07,
|
| 33017 |
+
"loss": 0.2487,
|
| 33018 |
+
"step": 4679
|
| 33019 |
+
},
|
| 33020 |
+
{
|
| 33021 |
+
"epoch": 0.29410378470094733,
|
| 33022 |
+
"grad_norm": 1.664792776107788,
|
| 33023 |
+
"learning_rate": 3.095544308688414e-07,
|
| 33024 |
+
"loss": 0.2176,
|
| 33025 |
+
"step": 4680
|
| 33026 |
+
},
|
| 33027 |
+
{
|
| 33028 |
+
"epoch": 0.294166627389986,
|
| 33029 |
+
"grad_norm": 2.5944247245788574,
|
| 33030 |
+
"learning_rate": 3.044192955055536e-07,
|
| 33031 |
+
"loss": 0.3102,
|
| 33032 |
+
"step": 4681
|
| 33033 |
+
},
|
| 33034 |
+
{
|
| 33035 |
+
"epoch": 0.29422947007902467,
|
| 33036 |
+
"grad_norm": 2.098593235015869,
|
| 33037 |
+
"learning_rate": 2.993270450628694e-07,
|
| 33038 |
+
"loss": 0.4391,
|
| 33039 |
+
"step": 4682
|
| 33040 |
+
},
|
| 33041 |
+
{
|
| 33042 |
+
"epoch": 0.29429231276806334,
|
| 33043 |
+
"grad_norm": 2.3814969062805176,
|
| 33044 |
+
"learning_rate": 2.942776817312587e-07,
|
| 33045 |
+
"loss": 0.4838,
|
| 33046 |
+
"step": 4683
|
| 33047 |
+
},
|
| 33048 |
+
{
|
| 33049 |
+
"epoch": 0.294355155457102,
|
| 33050 |
+
"grad_norm": 2.1043124198913574,
|
| 33051 |
+
"learning_rate": 2.8927120768275083e-07,
|
| 33052 |
+
"loss": 0.2638,
|
| 33053 |
+
"step": 4684
|
| 33054 |
+
},
|
| 33055 |
+
{
|
| 33056 |
+
"epoch": 0.29441799814614067,
|
| 33057 |
+
"grad_norm": 1.7827088832855225,
|
| 33058 |
+
"learning_rate": 2.8430762507093424e-07,
|
| 33059 |
+
"loss": 0.3153,
|
| 33060 |
+
"step": 4685
|
| 33061 |
+
},
|
| 33062 |
+
{
|
| 33063 |
+
"epoch": 0.29448084083517934,
|
| 33064 |
+
"grad_norm": 2.5352165699005127,
|
| 33065 |
+
"learning_rate": 2.7938693603093424e-07,
|
| 33066 |
+
"loss": 0.5398,
|
| 33067 |
+
"step": 4686
|
| 33068 |
+
},
|
| 33069 |
+
{
|
| 33070 |
+
"epoch": 0.294543683524218,
|
| 33071 |
+
"grad_norm": 3.3297882080078125,
|
| 33072 |
+
"learning_rate": 2.745091426794133e-07,
|
| 33073 |
+
"loss": 0.4807,
|
| 33074 |
+
"step": 4687
|
| 33075 |
+
},
|
| 33076 |
+
{
|
| 33077 |
+
"epoch": 0.29460652621325667,
|
| 33078 |
+
"grad_norm": 1.8412754535675049,
|
| 33079 |
+
"learning_rate": 2.696742471146152e-07,
|
| 33080 |
+
"loss": 0.2881,
|
| 33081 |
+
"step": 4688
|
| 33082 |
+
},
|
| 33083 |
+
{
|
| 33084 |
+
"epoch": 0.29466936890229534,
|
| 33085 |
+
"grad_norm": 2.1237809658050537,
|
| 33086 |
+
"learning_rate": 2.6488225141630964e-07,
|
| 33087 |
+
"loss": 0.2974,
|
| 33088 |
+
"step": 4689
|
| 33089 |
+
},
|
| 33090 |
+
{
|
| 33091 |
+
"epoch": 0.294732211591334,
|
| 33092 |
+
"grad_norm": 1.9878315925598145,
|
| 33093 |
+
"learning_rate": 2.601331576458033e-07,
|
| 33094 |
+
"loss": 0.3991,
|
| 33095 |
+
"step": 4690
|
| 33096 |
+
},
|
| 33097 |
+
{
|
| 33098 |
+
"epoch": 0.29479505428037267,
|
| 33099 |
+
"grad_norm": 2.3485636711120605,
|
| 33100 |
+
"learning_rate": 2.554269678459731e-07,
|
| 33101 |
+
"loss": 0.4728,
|
| 33102 |
+
"step": 4691
|
| 33103 |
+
},
|
| 33104 |
+
{
|
| 33105 |
+
"epoch": 0.29485789696941134,
|
| 33106 |
+
"grad_norm": 2.3997702598571777,
|
| 33107 |
+
"learning_rate": 2.5076368404122194e-07,
|
| 33108 |
+
"loss": 0.4448,
|
| 33109 |
+
"step": 4692
|
| 33110 |
+
},
|
| 33111 |
+
{
|
| 33112 |
+
"epoch": 0.29492073965845,
|
| 33113 |
+
"grad_norm": 2.6665897369384766,
|
| 33114 |
+
"learning_rate": 2.461433082375009e-07,
|
| 33115 |
+
"loss": 0.4554,
|
| 33116 |
+
"step": 4693
|
| 33117 |
+
},
|
| 33118 |
+
{
|
| 33119 |
+
"epoch": 0.29498358234748867,
|
| 33120 |
+
"grad_norm": 2.4666266441345215,
|
| 33121 |
+
"learning_rate": 2.415658424222977e-07,
|
| 33122 |
+
"loss": 0.4352,
|
| 33123 |
+
"step": 4694
|
| 33124 |
+
},
|
| 33125 |
+
{
|
| 33126 |
+
"epoch": 0.29504642503652734,
|
| 33127 |
+
"grad_norm": 1.5590837001800537,
|
| 33128 |
+
"learning_rate": 2.3703128856465973e-07,
|
| 33129 |
+
"loss": 0.2812,
|
| 33130 |
+
"step": 4695
|
| 33131 |
+
},
|
| 33132 |
+
{
|
| 33133 |
+
"epoch": 0.295109267725566,
|
| 33134 |
+
"grad_norm": 1.701009750366211,
|
| 33135 |
+
"learning_rate": 2.3253964861517098e-07,
|
| 33136 |
+
"loss": 0.1971,
|
| 33137 |
+
"step": 4696
|
| 33138 |
+
},
|
| 33139 |
+
{
|
| 33140 |
+
"epoch": 0.2951721104146046,
|
| 33141 |
+
"grad_norm": 2.5180039405822754,
|
| 33142 |
+
"learning_rate": 2.280909245059415e-07,
|
| 33143 |
+
"loss": 0.4586,
|
| 33144 |
+
"step": 4697
|
| 33145 |
+
},
|
| 33146 |
+
{
|
| 33147 |
+
"epoch": 0.2952349531036433,
|
| 33148 |
+
"grad_norm": 2.7880687713623047,
|
| 33149 |
+
"learning_rate": 2.2368511815061833e-07,
|
| 33150 |
+
"loss": 0.4654,
|
| 33151 |
+
"step": 4698
|
| 33152 |
+
},
|
| 33153 |
+
{
|
| 33154 |
+
"epoch": 0.29529779579268195,
|
| 33155 |
+
"grad_norm": 2.6886839866638184,
|
| 33156 |
+
"learning_rate": 2.1932223144440767e-07,
|
| 33157 |
+
"loss": 0.55,
|
| 33158 |
+
"step": 4699
|
| 33159 |
+
},
|
| 33160 |
+
{
|
| 33161 |
+
"epoch": 0.2953606384817206,
|
| 33162 |
+
"grad_norm": 3.219381093978882,
|
| 33163 |
+
"learning_rate": 2.1500226626404163e-07,
|
| 33164 |
+
"loss": 0.8113,
|
| 33165 |
+
"step": 4700
|
| 33166 |
+
},
|
| 33167 |
+
{
|
| 33168 |
+
"epoch": 0.2954234811707593,
|
| 33169 |
+
"grad_norm": 2.3821513652801514,
|
| 33170 |
+
"learning_rate": 2.107252244678004e-07,
|
| 33171 |
+
"loss": 0.4274,
|
| 33172 |
+
"step": 4701
|
| 33173 |
+
},
|
| 33174 |
+
{
|
| 33175 |
+
"epoch": 0.29548632385979795,
|
| 33176 |
+
"grad_norm": 2.3419077396392822,
|
| 33177 |
+
"learning_rate": 2.0649110789547898e-07,
|
| 33178 |
+
"loss": 0.466,
|
| 33179 |
+
"step": 4702
|
| 33180 |
+
},
|
| 33181 |
+
{
|
| 33182 |
+
"epoch": 0.2955491665488366,
|
| 33183 |
+
"grad_norm": 2.481903553009033,
|
| 33184 |
+
"learning_rate": 2.0229991836842045e-07,
|
| 33185 |
+
"loss": 0.3946,
|
| 33186 |
+
"step": 4703
|
| 33187 |
+
},
|
| 33188 |
+
{
|
| 33189 |
+
"epoch": 0.2956120092378753,
|
| 33190 |
+
"grad_norm": 1.9936519861221313,
|
| 33191 |
+
"learning_rate": 1.9815165768949373e-07,
|
| 33192 |
+
"loss": 0.4635,
|
| 33193 |
+
"step": 4704
|
| 33194 |
+
},
|
| 33195 |
+
{
|
| 33196 |
+
"epoch": 0.29567485192691395,
|
| 33197 |
+
"grad_norm": 1.9302668571472168,
|
| 33198 |
+
"learning_rate": 1.9404632764312703e-07,
|
| 33199 |
+
"loss": 0.2662,
|
| 33200 |
+
"step": 4705
|
| 33201 |
+
},
|
| 33202 |
+
{
|
| 33203 |
+
"epoch": 0.2957376946159526,
|
| 33204 |
+
"grad_norm": 1.3592628240585327,
|
| 33205 |
+
"learning_rate": 1.8998392999525217e-07,
|
| 33206 |
+
"loss": 0.142,
|
| 33207 |
+
"step": 4706
|
| 33208 |
+
},
|
| 33209 |
+
{
|
| 33210 |
+
"epoch": 0.2958005373049913,
|
| 33211 |
+
"grad_norm": 2.258504867553711,
|
| 33212 |
+
"learning_rate": 1.859644664933602e-07,
|
| 33213 |
+
"loss": 0.4378,
|
| 33214 |
+
"step": 4707
|
| 33215 |
+
},
|
| 33216 |
+
{
|
| 33217 |
+
"epoch": 0.29586337999402995,
|
| 33218 |
+
"grad_norm": 2.3544039726257324,
|
| 33219 |
+
"learning_rate": 1.8198793886643473e-07,
|
| 33220 |
+
"loss": 0.3552,
|
| 33221 |
+
"step": 4708
|
| 33222 |
+
},
|
| 33223 |
+
{
|
| 33224 |
+
"epoch": 0.2959262226830686,
|
| 33225 |
+
"grad_norm": 2.9069294929504395,
|
| 33226 |
+
"learning_rate": 1.7805434882502968e-07,
|
| 33227 |
+
"loss": 0.381,
|
| 33228 |
+
"step": 4709
|
| 33229 |
+
},
|
| 33230 |
+
{
|
| 33231 |
+
"epoch": 0.2959890653721073,
|
| 33232 |
+
"grad_norm": 1.846558928489685,
|
| 33233 |
+
"learning_rate": 1.741636980612027e-07,
|
| 33234 |
+
"loss": 0.2916,
|
| 33235 |
+
"step": 4710
|
| 33236 |
+
},
|
| 33237 |
+
{
|
| 33238 |
+
"epoch": 0.29605190806114595,
|
| 33239 |
+
"grad_norm": 2.1970579624176025,
|
| 33240 |
+
"learning_rate": 1.7031598824855942e-07,
|
| 33241 |
+
"loss": 0.356,
|
| 33242 |
+
"step": 4711
|
| 33243 |
+
},
|
| 33244 |
+
{
|
| 33245 |
+
"epoch": 0.2961147507501846,
|
| 33246 |
+
"grad_norm": 2.020313262939453,
|
| 33247 |
+
"learning_rate": 1.665112210422204e-07,
|
| 33248 |
+
"loss": 0.2945,
|
| 33249 |
+
"step": 4712
|
| 33250 |
+
},
|
| 33251 |
+
{
|
| 33252 |
+
"epoch": 0.2961775934392233,
|
| 33253 |
+
"grad_norm": 1.7829011678695679,
|
| 33254 |
+
"learning_rate": 1.6274939807885415e-07,
|
| 33255 |
+
"loss": 0.2191,
|
| 33256 |
+
"step": 4713
|
| 33257 |
+
},
|
| 33258 |
+
{
|
| 33259 |
+
"epoch": 0.29624043612826195,
|
| 33260 |
+
"grad_norm": 1.6779543161392212,
|
| 33261 |
+
"learning_rate": 1.5903052097661075e-07,
|
| 33262 |
+
"loss": 0.2067,
|
| 33263 |
+
"step": 4714
|
| 33264 |
+
},
|
| 33265 |
+
{
|
| 33266 |
+
"epoch": 0.2963032788173006,
|
| 33267 |
+
"grad_norm": 2.2958099842071533,
|
| 33268 |
+
"learning_rate": 1.5535459133523277e-07,
|
| 33269 |
+
"loss": 0.4576,
|
| 33270 |
+
"step": 4715
|
| 33271 |
+
},
|
| 33272 |
+
{
|
| 33273 |
+
"epoch": 0.29636612150633923,
|
| 33274 |
+
"grad_norm": 1.8374015092849731,
|
| 33275 |
+
"learning_rate": 1.5172161073592205e-07,
|
| 33276 |
+
"loss": 0.3294,
|
| 33277 |
+
"step": 4716
|
| 33278 |
+
},
|
| 33279 |
+
{
|
| 33280 |
+
"epoch": 0.2964289641953779,
|
| 33281 |
+
"grad_norm": 1.9261128902435303,
|
| 33282 |
+
"learning_rate": 1.4813158074146184e-07,
|
| 33283 |
+
"loss": 0.3201,
|
| 33284 |
+
"step": 4717
|
| 33285 |
+
},
|
| 33286 |
+
{
|
| 33287 |
+
"epoch": 0.29649180688441656,
|
| 33288 |
+
"grad_norm": 2.089233160018921,
|
| 33289 |
+
"learning_rate": 1.445845028961168e-07,
|
| 33290 |
+
"loss": 0.3977,
|
| 33291 |
+
"step": 4718
|
| 33292 |
+
},
|
| 33293 |
+
{
|
| 33294 |
+
"epoch": 0.29655464957345523,
|
| 33295 |
+
"grad_norm": 2.7629919052124023,
|
| 33296 |
+
"learning_rate": 1.410803787257109e-07,
|
| 33297 |
+
"loss": 0.3444,
|
| 33298 |
+
"step": 4719
|
| 33299 |
+
},
|
| 33300 |
+
{
|
| 33301 |
+
"epoch": 0.2966174922624939,
|
| 33302 |
+
"grad_norm": 2.9687700271606445,
|
| 33303 |
+
"learning_rate": 1.376192097375495e-07,
|
| 33304 |
+
"loss": 0.676,
|
| 33305 |
+
"step": 4720
|
| 33306 |
+
},
|
| 33307 |
+
{
|
| 33308 |
+
"epoch": 0.29668033495153256,
|
| 33309 |
+
"grad_norm": 2.8374509811401367,
|
| 33310 |
+
"learning_rate": 1.3420099742050828e-07,
|
| 33311 |
+
"loss": 0.6555,
|
| 33312 |
+
"step": 4721
|
| 33313 |
+
},
|
| 33314 |
+
{
|
| 33315 |
+
"epoch": 0.29674317764057123,
|
| 33316 |
+
"grad_norm": 3.2705178260803223,
|
| 33317 |
+
"learning_rate": 1.3082574324494446e-07,
|
| 33318 |
+
"loss": 0.5476,
|
| 33319 |
+
"step": 4722
|
| 33320 |
+
},
|
| 33321 |
+
{
|
| 33322 |
+
"epoch": 0.2968060203296099,
|
| 33323 |
+
"grad_norm": 2.3499114513397217,
|
| 33324 |
+
"learning_rate": 1.2749344866276326e-07,
|
| 33325 |
+
"loss": 0.3716,
|
| 33326 |
+
"step": 4723
|
| 33327 |
+
},
|
| 33328 |
+
{
|
| 33329 |
+
"epoch": 0.29686886301864857,
|
| 33330 |
+
"grad_norm": 2.4421637058258057,
|
| 33331 |
+
"learning_rate": 1.2420411510737361e-07,
|
| 33332 |
+
"loss": 0.4253,
|
| 33333 |
+
"step": 4724
|
| 33334 |
+
},
|
| 33335 |
+
{
|
| 33336 |
+
"epoch": 0.29693170570768723,
|
| 33337 |
+
"grad_norm": 1.5724056959152222,
|
| 33338 |
+
"learning_rate": 1.2095774399372152e-07,
|
| 33339 |
+
"loss": 0.1912,
|
| 33340 |
+
"step": 4725
|
| 33341 |
+
},
|
| 33342 |
+
{
|
| 33343 |
+
"epoch": 0.2969945483967259,
|
| 33344 |
+
"grad_norm": 2.3797264099121094,
|
| 33345 |
+
"learning_rate": 1.1775433671824542e-07,
|
| 33346 |
+
"loss": 0.4165,
|
| 33347 |
+
"step": 4726
|
| 33348 |
+
},
|
| 33349 |
+
{
|
| 33350 |
+
"epoch": 0.29705739108576457,
|
| 33351 |
+
"grad_norm": 3.220078945159912,
|
| 33352 |
+
"learning_rate": 1.14593894658932e-07,
|
| 33353 |
+
"loss": 0.5393,
|
| 33354 |
+
"step": 4727
|
| 33355 |
+
},
|
| 33356 |
+
{
|
| 33357 |
+
"epoch": 0.29712023377480323,
|
| 33358 |
+
"grad_norm": 2.995271682739258,
|
| 33359 |
+
"learning_rate": 1.1147641917527152e-07,
|
| 33360 |
+
"loss": 0.5784,
|
| 33361 |
+
"step": 4728
|
| 33362 |
+
},
|
| 33363 |
+
{
|
| 33364 |
+
"epoch": 0.2971830764638419,
|
| 33365 |
+
"grad_norm": 2.219343423843384,
|
| 33366 |
+
"learning_rate": 1.0840191160825796e-07,
|
| 33367 |
+
"loss": 0.4409,
|
| 33368 |
+
"step": 4729
|
| 33369 |
+
},
|
| 33370 |
+
{
|
| 33371 |
+
"epoch": 0.29724591915288057,
|
| 33372 |
+
"grad_norm": 2.8350963592529297,
|
| 33373 |
+
"learning_rate": 1.0537037328044452e-07,
|
| 33374 |
+
"loss": 0.3596,
|
| 33375 |
+
"step": 4730
|
| 33376 |
+
},
|
| 33377 |
+
{
|
| 33378 |
+
"epoch": 0.29730876184191923,
|
| 33379 |
+
"grad_norm": 1.9107881784439087,
|
| 33380 |
+
"learning_rate": 1.0238180549585475e-07,
|
| 33381 |
+
"loss": 0.373,
|
| 33382 |
+
"step": 4731
|
| 33383 |
+
},
|
| 33384 |
+
{
|
| 33385 |
+
"epoch": 0.2973716045309579,
|
| 33386 |
+
"grad_norm": 2.765031337738037,
|
| 33387 |
+
"learning_rate": 9.943620954003807e-08,
|
| 33388 |
+
"loss": 0.529,
|
| 33389 |
+
"step": 4732
|
| 33390 |
+
},
|
| 33391 |
+
{
|
| 33392 |
+
"epoch": 0.29743444721999657,
|
| 33393 |
+
"grad_norm": 2.45477032661438,
|
| 33394 |
+
"learning_rate": 9.653358668009205e-08,
|
| 33395 |
+
"loss": 0.5405,
|
| 33396 |
+
"step": 4733
|
| 33397 |
+
},
|
| 33398 |
+
{
|
| 33399 |
+
"epoch": 0.29749728990903523,
|
| 33400 |
+
"grad_norm": 2.3020098209381104,
|
| 33401 |
+
"learning_rate": 9.367393816459569e-08,
|
| 33402 |
+
"loss": 0.3729,
|
| 33403 |
+
"step": 4734
|
| 33404 |
+
},
|
| 33405 |
+
{
|
| 33406 |
+
"epoch": 0.29756013259807385,
|
| 33407 |
+
"grad_norm": 3.153657913208008,
|
| 33408 |
+
"learning_rate": 9.08572652236428e-08,
|
| 33409 |
+
"loss": 0.5445,
|
| 33410 |
+
"step": 4735
|
| 33411 |
+
},
|
| 33412 |
+
{
|
| 33413 |
+
"epoch": 0.2976229752871125,
|
| 33414 |
+
"grad_norm": 1.996903896331787,
|
| 33415 |
+
"learning_rate": 8.808356906886417e-08,
|
| 33416 |
+
"loss": 0.2753,
|
| 33417 |
+
"step": 4736
|
| 33418 |
+
},
|
| 33419 |
+
{
|
| 33420 |
+
"epoch": 0.2976858179761512,
|
| 33421 |
+
"grad_norm": 2.029475450515747,
|
| 33422 |
+
"learning_rate": 8.535285089337208e-08,
|
| 33423 |
+
"loss": 0.4509,
|
| 33424 |
+
"step": 4737
|
| 33425 |
+
},
|
| 33426 |
+
{
|
| 33427 |
+
"epoch": 0.29774866066518985,
|
| 33428 |
+
"grad_norm": 2.5890238285064697,
|
| 33429 |
+
"learning_rate": 8.266511187182691e-08,
|
| 33430 |
+
"loss": 0.4646,
|
| 33431 |
+
"step": 4738
|
| 33432 |
+
},
|
| 33433 |
+
{
|
| 33434 |
+
"epoch": 0.2978115033542285,
|
| 33435 |
+
"grad_norm": 1.819513201713562,
|
| 33436 |
+
"learning_rate": 8.00203531603816e-08,
|
| 33437 |
+
"loss": 0.2967,
|
| 33438 |
+
"step": 4739
|
| 33439 |
+
},
|
| 33440 |
+
{
|
| 33441 |
+
"epoch": 0.2978743460432672,
|
| 33442 |
+
"grad_norm": 2.186786651611328,
|
| 33443 |
+
"learning_rate": 7.741857589668167e-08,
|
| 33444 |
+
"loss": 0.395,
|
| 33445 |
+
"step": 4740
|
| 33446 |
+
},
|
| 33447 |
+
{
|
| 33448 |
+
"epoch": 0.29793718873230585,
|
| 33449 |
+
"grad_norm": 2.0261733531951904,
|
| 33450 |
+
"learning_rate": 7.485978119992076e-08,
|
| 33451 |
+
"loss": 0.3222,
|
| 33452 |
+
"step": 4741
|
| 33453 |
+
},
|
| 33454 |
+
{
|
| 33455 |
+
"epoch": 0.2980000314213445,
|
| 33456 |
+
"grad_norm": 2.7294857501983643,
|
| 33457 |
+
"learning_rate": 7.23439701707851e-08,
|
| 33458 |
+
"loss": 0.4577,
|
| 33459 |
+
"step": 4742
|
| 33460 |
+
},
|
| 33461 |
+
{
|
| 33462 |
+
"epoch": 0.2980628741103832,
|
| 33463 |
+
"grad_norm": 2.345116376876831,
|
| 33464 |
+
"learning_rate": 6.987114389147565e-08,
|
| 33465 |
+
"loss": 0.4135,
|
| 33466 |
+
"step": 4743
|
| 33467 |
+
},
|
| 33468 |
+
{
|
| 33469 |
+
"epoch": 0.29812571679942185,
|
| 33470 |
+
"grad_norm": 2.4506092071533203,
|
| 33471 |
+
"learning_rate": 6.74413034256971e-08,
|
| 33472 |
+
"loss": 0.3353,
|
| 33473 |
+
"step": 4744
|
| 33474 |
+
},
|
| 33475 |
+
{
|
| 33476 |
+
"epoch": 0.2981885594884605,
|
| 33477 |
+
"grad_norm": 2.185295820236206,
|
| 33478 |
+
"learning_rate": 6.505444981865783e-08,
|
| 33479 |
+
"loss": 0.3282,
|
| 33480 |
+
"step": 4745
|
| 33481 |
+
},
|
| 33482 |
+
{
|
| 33483 |
+
"epoch": 0.2982514021774992,
|
| 33484 |
+
"grad_norm": 2.5835084915161133,
|
| 33485 |
+
"learning_rate": 6.271058409709208e-08,
|
| 33486 |
+
"loss": 0.401,
|
| 33487 |
+
"step": 4746
|
| 33488 |
+
},
|
| 33489 |
+
{
|
| 33490 |
+
"epoch": 0.29831424486653785,
|
| 33491 |
+
"grad_norm": 2.5246691703796387,
|
| 33492 |
+
"learning_rate": 6.040970726923778e-08,
|
| 33493 |
+
"loss": 0.5252,
|
| 33494 |
+
"step": 4747
|
| 33495 |
+
},
|
| 33496 |
+
{
|
| 33497 |
+
"epoch": 0.2983770875555765,
|
| 33498 |
+
"grad_norm": 1.4986598491668701,
|
| 33499 |
+
"learning_rate": 5.815182032482547e-08,
|
| 33500 |
+
"loss": 0.1997,
|
| 33501 |
+
"step": 4748
|
| 33502 |
+
},
|
| 33503 |
+
{
|
| 33504 |
+
"epoch": 0.2984399302446152,
|
| 33505 |
+
"grad_norm": 3.4170660972595215,
|
| 33506 |
+
"learning_rate": 5.5936924235122644e-08,
|
| 33507 |
+
"loss": 0.4359,
|
| 33508 |
+
"step": 4749
|
| 33509 |
+
},
|
| 33510 |
+
{
|
| 33511 |
+
"epoch": 0.29850277293365385,
|
| 33512 |
+
"grad_norm": 1.9250892400741577,
|
| 33513 |
+
"learning_rate": 5.37650199528672e-08,
|
| 33514 |
+
"loss": 0.3435,
|
| 33515 |
+
"step": 4750
|
| 33516 |
+
},
|
| 33517 |
+
{
|
| 33518 |
+
"epoch": 0.2985656156226925,
|
| 33519 |
+
"grad_norm": 1.8855154514312744,
|
| 33520 |
+
"learning_rate": 5.163610841233402e-08,
|
| 33521 |
+
"loss": 0.2277,
|
| 33522 |
+
"step": 4751
|
| 33523 |
+
},
|
| 33524 |
+
{
|
| 33525 |
+
"epoch": 0.2986284583117312,
|
| 33526 |
+
"grad_norm": 2.363598108291626,
|
| 33527 |
+
"learning_rate": 4.955019052927945e-08,
|
| 33528 |
+
"loss": 0.3288,
|
| 33529 |
+
"step": 4752
|
| 33530 |
+
},
|
| 33531 |
+
{
|
| 33532 |
+
"epoch": 0.29869130100076985,
|
| 33533 |
+
"grad_norm": 1.6357632875442505,
|
| 33534 |
+
"learning_rate": 4.7507267200996854e-08,
|
| 33535 |
+
"loss": 0.2289,
|
| 33536 |
+
"step": 4753
|
| 33537 |
+
},
|
| 33538 |
+
{
|
| 33539 |
+
"epoch": 0.29875414368980846,
|
| 33540 |
+
"grad_norm": 3.067458152770996,
|
| 33541 |
+
"learning_rate": 4.550733930626105e-08,
|
| 33542 |
+
"loss": 0.6554,
|
| 33543 |
+
"step": 4754
|
| 33544 |
+
},
|
| 33545 |
+
{
|
| 33546 |
+
"epoch": 0.2988169863788471,
|
| 33547 |
+
"grad_norm": 2.7420458793640137,
|
| 33548 |
+
"learning_rate": 4.3550407705361674e-08,
|
| 33549 |
+
"loss": 0.5047,
|
| 33550 |
+
"step": 4755
|
| 33551 |
+
},
|
| 33552 |
+
{
|
| 33553 |
+
"epoch": 0.2988798290678858,
|
| 33554 |
+
"grad_norm": 2.2644636631011963,
|
| 33555 |
+
"learning_rate": 4.1636473240080906e-08,
|
| 33556 |
+
"loss": 0.1852,
|
| 33557 |
+
"step": 4756
|
| 33558 |
+
},
|
| 33559 |
+
{
|
| 33560 |
+
"epoch": 0.29894267175692446,
|
| 33561 |
+
"grad_norm": 2.0308704376220703,
|
| 33562 |
+
"learning_rate": 3.9765536733715746e-08,
|
| 33563 |
+
"loss": 0.4435,
|
| 33564 |
+
"step": 4757
|
| 33565 |
+
},
|
| 33566 |
+
{
|
| 33567 |
+
"epoch": 0.2990055144459631,
|
| 33568 |
+
"grad_norm": 2.9528379440307617,
|
| 33569 |
+
"learning_rate": 3.793759899106686e-08,
|
| 33570 |
+
"loss": 0.6941,
|
| 33571 |
+
"step": 4758
|
| 33572 |
+
},
|
| 33573 |
+
{
|
| 33574 |
+
"epoch": 0.2990683571350018,
|
| 33575 |
+
"grad_norm": 2.3405072689056396,
|
| 33576 |
+
"learning_rate": 3.61526607984497e-08,
|
| 33577 |
+
"loss": 1.1545,
|
| 33578 |
+
"step": 4759
|
| 33579 |
+
},
|
| 33580 |
+
{
|
| 33581 |
+
"epoch": 0.29913119982404046,
|
| 33582 |
+
"grad_norm": 3.4638097286224365,
|
| 33583 |
+
"learning_rate": 3.441072292365011e-08,
|
| 33584 |
+
"loss": 0.7288,
|
| 33585 |
+
"step": 4760
|
| 33586 |
+
},
|
| 33587 |
+
{
|
| 33588 |
+
"epoch": 0.29919404251307913,
|
| 33589 |
+
"grad_norm": 1.7252330780029297,
|
| 33590 |
+
"learning_rate": 3.2711786115990905e-08,
|
| 33591 |
+
"loss": 0.3102,
|
| 33592 |
+
"step": 4761
|
| 33593 |
+
},
|
| 33594 |
+
{
|
| 33595 |
+
"epoch": 0.2992568852021178,
|
| 33596 |
+
"grad_norm": 1.9077353477478027,
|
| 33597 |
+
"learning_rate": 3.1055851106287507e-08,
|
| 33598 |
+
"loss": 0.4047,
|
| 33599 |
+
"step": 4762
|
| 33600 |
+
},
|
| 33601 |
+
{
|
| 33602 |
+
"epoch": 0.29931972789115646,
|
| 33603 |
+
"grad_norm": 2.687028408050537,
|
| 33604 |
+
"learning_rate": 2.9442918606847892e-08,
|
| 33605 |
+
"loss": 0.4612,
|
| 33606 |
+
"step": 4763
|
| 33607 |
+
},
|
| 33608 |
+
{
|
| 33609 |
+
"epoch": 0.29938257058019513,
|
| 33610 |
+
"grad_norm": 2.5931758880615234,
|
| 33611 |
+
"learning_rate": 2.7872989311483744e-08,
|
| 33612 |
+
"loss": 0.6446,
|
| 33613 |
+
"step": 4764
|
| 33614 |
+
},
|
| 33615 |
+
{
|
| 33616 |
+
"epoch": 0.2994454132692338,
|
| 33617 |
+
"grad_norm": 2.4186606407165527,
|
| 33618 |
+
"learning_rate": 2.6346063895521524e-08,
|
| 33619 |
+
"loss": 0.4142,
|
| 33620 |
+
"step": 4765
|
| 33621 |
+
},
|
| 33622 |
+
{
|
| 33623 |
+
"epoch": 0.29950825595827246,
|
| 33624 |
+
"grad_norm": 2.1893558502197266,
|
| 33625 |
+
"learning_rate": 2.486214301579137e-08,
|
| 33626 |
+
"loss": 0.2492,
|
| 33627 |
+
"step": 4766
|
| 33628 |
+
},
|
| 33629 |
+
{
|
| 33630 |
+
"epoch": 0.29957109864731113,
|
| 33631 |
+
"grad_norm": 2.829211473464966,
|
| 33632 |
+
"learning_rate": 2.3421227310593818e-08,
|
| 33633 |
+
"loss": 0.885,
|
| 33634 |
+
"step": 4767
|
| 33635 |
+
},
|
| 33636 |
+
{
|
| 33637 |
+
"epoch": 0.2996339413363498,
|
| 33638 |
+
"grad_norm": 2.935715436935425,
|
| 33639 |
+
"learning_rate": 2.2023317399766374e-08,
|
| 33640 |
+
"loss": 0.5745,
|
| 33641 |
+
"step": 4768
|
| 33642 |
+
},
|
| 33643 |
+
{
|
| 33644 |
+
"epoch": 0.29969678402538846,
|
| 33645 |
+
"grad_norm": 2.8492228984832764,
|
| 33646 |
+
"learning_rate": 2.0668413884616933e-08,
|
| 33647 |
+
"loss": 0.5578,
|
| 33648 |
+
"step": 4769
|
| 33649 |
+
},
|
| 33650 |
+
{
|
| 33651 |
+
"epoch": 0.29975962671442713,
|
| 33652 |
+
"grad_norm": 2.477240562438965,
|
| 33653 |
+
"learning_rate": 1.9356517347990378e-08,
|
| 33654 |
+
"loss": 0.4398,
|
| 33655 |
+
"step": 4770
|
| 33656 |
+
},
|
| 33657 |
+
{
|
| 33658 |
+
"epoch": 0.2998224694034658,
|
| 33659 |
+
"grad_norm": 2.6501424312591553,
|
| 33660 |
+
"learning_rate": 1.8087628354190867e-08,
|
| 33661 |
+
"loss": 0.3182,
|
| 33662 |
+
"step": 4771
|
| 33663 |
+
},
|
| 33664 |
+
{
|
| 33665 |
+
"epoch": 0.29988531209250446,
|
| 33666 |
+
"grad_norm": 1.8627532720565796,
|
| 33667 |
+
"learning_rate": 1.686174744905955e-08,
|
| 33668 |
+
"loss": 0.3029,
|
| 33669 |
+
"step": 4772
|
| 33670 |
+
},
|
| 33671 |
+
{
|
| 33672 |
+
"epoch": 0.2999481547815431,
|
| 33673 |
+
"grad_norm": 1.6871707439422607,
|
| 33674 |
+
"learning_rate": 1.5678875159907956e-08,
|
| 33675 |
+
"loss": 0.3523,
|
| 33676 |
+
"step": 4773
|
| 33677 |
+
},
|
| 33678 |
+
{
|
| 33679 |
+
"epoch": 0.30001099747058174,
|
| 33680 |
+
"grad_norm": 2.9347338676452637,
|
| 33681 |
+
"learning_rate": 1.45390119955513e-08,
|
| 33682 |
+
"loss": 0.5184,
|
| 33683 |
+
"step": 4774
|
| 33684 |
+
},
|
| 33685 |
+
{
|
| 33686 |
+
"epoch": 0.3000738401596204,
|
| 33687 |
+
"grad_norm": 2.7415382862091064,
|
| 33688 |
+
"learning_rate": 1.3442158446330677e-08,
|
| 33689 |
+
"loss": 0.4626,
|
| 33690 |
+
"step": 4775
|
| 33691 |
+
},
|
| 33692 |
+
{
|
| 33693 |
+
"epoch": 0.3001366828486591,
|
| 33694 |
+
"grad_norm": 1.740527868270874,
|
| 33695 |
+
"learning_rate": 1.238831498405757e-08,
|
| 33696 |
+
"loss": 0.4201,
|
| 33697 |
+
"step": 4776
|
| 33698 |
+
},
|
| 33699 |
+
{
|
| 33700 |
+
"epoch": 0.30019952553769774,
|
| 33701 |
+
"grad_norm": 2.0278115272521973,
|
| 33702 |
+
"learning_rate": 1.1377482062058242e-08,
|
| 33703 |
+
"loss": 0.3575,
|
| 33704 |
+
"step": 4777
|
| 33705 |
+
},
|
| 33706 |
+
{
|
| 33707 |
+
"epoch": 0.3002623682267364,
|
| 33708 |
+
"grad_norm": 2.5330917835235596,
|
| 33709 |
+
"learning_rate": 1.040966011514044e-08,
|
| 33710 |
+
"loss": 0.5538,
|
| 33711 |
+
"step": 4778
|
| 33712 |
+
},
|
| 33713 |
+
{
|
| 33714 |
+
"epoch": 0.3003252109157751,
|
| 33715 |
+
"grad_norm": 2.3892669677734375,
|
| 33716 |
+
"learning_rate": 9.484849559626696e-09,
|
| 33717 |
+
"loss": 0.4551,
|
| 33718 |
+
"step": 4779
|
| 33719 |
+
},
|
| 33720 |
+
{
|
| 33721 |
+
"epoch": 0.30038805360481374,
|
| 33722 |
+
"grad_norm": 1.859297275543213,
|
| 33723 |
+
"learning_rate": 8.603050793332124e-09,
|
| 33724 |
+
"loss": 0.3839,
|
| 33725 |
+
"step": 4780
|
| 33726 |
+
},
|
| 33727 |
+
{
|
| 33728 |
+
"epoch": 0.3004508962938524,
|
| 33729 |
+
"grad_norm": 1.8480117321014404,
|
| 33730 |
+
"learning_rate": 7.764264195564418e-09,
|
| 33731 |
+
"loss": 0.2844,
|
| 33732 |
+
"step": 4781
|
| 33733 |
+
},
|
| 33734 |
+
{
|
| 33735 |
+
"epoch": 0.3005137389828911,
|
| 33736 |
+
"grad_norm": 2.801325559616089,
|
| 33737 |
+
"learning_rate": 6.968490127146066e-09,
|
| 33738 |
+
"loss": 0.4269,
|
| 33739 |
+
"step": 4782
|
| 33740 |
+
},
|
| 33741 |
+
{
|
| 33742 |
+
"epoch": 0.30057658167192974,
|
| 33743 |
+
"grad_norm": 2.3261382579803467,
|
| 33744 |
+
"learning_rate": 6.215728930381026e-09,
|
| 33745 |
+
"loss": 0.4147,
|
| 33746 |
+
"step": 4783
|
| 33747 |
+
},
|
| 33748 |
+
{
|
| 33749 |
+
"epoch": 0.3006394243609684,
|
| 33750 |
+
"grad_norm": 1.4617159366607666,
|
| 33751 |
+
"learning_rate": 5.505980929065846e-09,
|
| 33752 |
+
"loss": 0.2487,
|
| 33753 |
+
"step": 4784
|
| 33754 |
+
},
|
| 33755 |
+
{
|
| 33756 |
+
"epoch": 0.3007022670500071,
|
| 33757 |
+
"grad_norm": 2.7126152515411377,
|
| 33758 |
+
"learning_rate": 4.839246428522959e-09,
|
| 33759 |
+
"loss": 0.3243,
|
| 33760 |
+
"step": 4785
|
| 33761 |
+
},
|
| 33762 |
+
{
|
| 33763 |
+
"epoch": 0.30076510973904574,
|
| 33764 |
+
"grad_norm": 3.2370524406433105,
|
| 33765 |
+
"learning_rate": 4.215525715545177e-09,
|
| 33766 |
+
"loss": 0.5889,
|
| 33767 |
+
"step": 4786
|
| 33768 |
+
},
|
| 33769 |
+
{
|
| 33770 |
+
"epoch": 0.3008279524280844,
|
| 33771 |
+
"grad_norm": 3.570406198501587,
|
| 33772 |
+
"learning_rate": 3.6348190584400974e-09,
|
| 33773 |
+
"loss": 0.5129,
|
| 33774 |
+
"step": 4787
|
| 33775 |
+
},
|
| 33776 |
+
{
|
| 33777 |
+
"epoch": 0.3008907951171231,
|
| 33778 |
+
"grad_norm": 2.6600561141967773,
|
| 33779 |
+
"learning_rate": 3.097126706985698e-09,
|
| 33780 |
+
"loss": 0.3928,
|
| 33781 |
+
"step": 4788
|
| 33782 |
+
},
|
| 33783 |
+
{
|
| 33784 |
+
"epoch": 0.30095363780616174,
|
| 33785 |
+
"grad_norm": 2.022742748260498,
|
| 33786 |
+
"learning_rate": 2.6024488924858424e-09,
|
| 33787 |
+
"loss": 0.2572,
|
| 33788 |
+
"step": 4789
|
| 33789 |
+
},
|
| 33790 |
+
{
|
| 33791 |
+
"epoch": 0.3010164804952004,
|
| 33792 |
+
"grad_norm": 2.228764533996582,
|
| 33793 |
+
"learning_rate": 2.150785827725876e-09,
|
| 33794 |
+
"loss": 0.285,
|
| 33795 |
+
"step": 4790
|
| 33796 |
+
},
|
| 33797 |
+
{
|
| 33798 |
+
"epoch": 0.3010793231842391,
|
| 33799 |
+
"grad_norm": 2.343676805496216,
|
| 33800 |
+
"learning_rate": 1.7421377070059307e-09,
|
| 33801 |
+
"loss": 0.3743,
|
| 33802 |
+
"step": 4791
|
| 33803 |
+
},
|
| 33804 |
+
{
|
| 33805 |
+
"epoch": 0.30114216587327775,
|
| 33806 |
+
"grad_norm": 2.335329294204712,
|
| 33807 |
+
"learning_rate": 1.3765047060965153e-09,
|
| 33808 |
+
"loss": 0.3663,
|
| 33809 |
+
"step": 4792
|
| 33810 |
+
},
|
| 33811 |
+
{
|
| 33812 |
+
"epoch": 0.30120500856231636,
|
| 33813 |
+
"grad_norm": 2.382797956466675,
|
| 33814 |
+
"learning_rate": 1.0538869822718234e-09,
|
| 33815 |
+
"loss": 0.5485,
|
| 33816 |
+
"step": 4793
|
| 33817 |
+
},
|
| 33818 |
+
{
|
| 33819 |
+
"epoch": 0.301267851251355,
|
| 33820 |
+
"grad_norm": 2.288264036178589,
|
| 33821 |
+
"learning_rate": 7.742846743319376e-10,
|
| 33822 |
+
"loss": 0.4206,
|
| 33823 |
+
"step": 4794
|
| 33824 |
+
},
|
| 33825 |
+
{
|
| 33826 |
+
"epoch": 0.3013306939403937,
|
| 33827 |
+
"grad_norm": 2.956861972808838,
|
| 33828 |
+
"learning_rate": 5.376979025251139e-10,
|
| 33829 |
+
"loss": 0.4977,
|
| 33830 |
+
"step": 4795
|
| 33831 |
+
},
|
| 33832 |
+
{
|
| 33833 |
+
"epoch": 0.30139353662943236,
|
| 33834 |
+
"grad_norm": 2.606020212173462,
|
| 33835 |
+
"learning_rate": 3.44126768636599e-10,
|
| 33836 |
+
"loss": 0.6166,
|
| 33837 |
+
"step": 4796
|
| 33838 |
+
},
|
| 33839 |
+
{
|
| 33840 |
+
"epoch": 0.301456379318471,
|
| 33841 |
+
"grad_norm": 3.4639132022857666,
|
| 33842 |
+
"learning_rate": 1.935713559331198e-10,
|
| 33843 |
+
"loss": 0.403,
|
| 33844 |
+
"step": 4797
|
| 33845 |
+
},
|
| 33846 |
+
{
|
| 33847 |
+
"epoch": 0.3015192220075097,
|
| 33848 |
+
"grad_norm": 2.327828884124756,
|
| 33849 |
+
"learning_rate": 8.603172916288315e-11,
|
| 33850 |
+
"loss": 0.4255,
|
| 33851 |
+
"step": 4798
|
| 33852 |
+
},
|
| 33853 |
+
{
|
| 33854 |
+
"epoch": 0.30158206469654836,
|
| 33855 |
+
"grad_norm": 2.0240509510040283,
|
| 33856 |
+
"learning_rate": 2.150793459998468e-11,
|
| 33857 |
+
"loss": 0.2529,
|
| 33858 |
+
"step": 4799
|
| 33859 |
+
},
|
| 33860 |
+
{
|
| 33861 |
+
"epoch": 0.301644907385587,
|
| 33862 |
+
"grad_norm": 2.3321471214294434,
|
| 33863 |
+
"learning_rate": 0.0,
|
| 33864 |
+
"loss": 0.3595,
|
| 33865 |
+
"step": 4800
|
| 33866 |
+
},
|
| 33867 |
+
{
|
| 33868 |
+
"epoch": 0.301644907385587,
|
| 33869 |
+
"eval_loss": 0.3833869695663452,
|
| 33870 |
+
"eval_runtime": 352.5884,
|
| 33871 |
+
"eval_samples_per_second": 14.181,
|
| 33872 |
+
"eval_steps_per_second": 7.09,
|
| 33873 |
+
"step": 4800
|
| 33874 |
}
|
| 33875 |
],
|
| 33876 |
"logging_steps": 1,
|
|
|
|
| 33894 |
"should_evaluate": false,
|
| 33895 |
"should_log": false,
|
| 33896 |
"should_save": true,
|
| 33897 |
+
"should_training_stop": true
|
| 33898 |
},
|
| 33899 |
"attributes": {}
|
| 33900 |
}
|
| 33901 |
},
|
| 33902 |
+
"total_flos": 9.409445344460145e+17,
|
| 33903 |
"train_batch_size": 2,
|
| 33904 |
"trial_name": null,
|
| 33905 |
"trial_params": null
|