Training in progress, step 700, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step700/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step700/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step700/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step700/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +727 -3
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13648688
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15078b350dceb966b20c8709542ebf0e64b3e9a4c0e2319cdaec4f9c5530bac6
|
| 3 |
size 13648688
|
last-checkpoint/global_step700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d753f240c6d44b4bbe0556059d07f11e525bb5e9db9c3f9f93ad5e62c7229d8b
|
| 3 |
+
size 20450800
|
last-checkpoint/global_step700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14db1f831000826974ad5f792ab00cc773e4701c955f34d50943cc8bf79f0528
|
| 3 |
+
size 20450800
|
last-checkpoint/global_step700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79468a097847f015aa4935f8c165d90349322e51d0721720db234d01ed6b2d13
|
| 3 |
+
size 20450800
|
last-checkpoint/global_step700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36d4083d52ad8ada7eb47d0557d374a79a41dacbad7e5613ad40f9ee07870048
|
| 3 |
+
size 20450800
|
last-checkpoint/global_step700/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9000f9e4de8903bf33637a3164d7047c80d16b19389259040cc5dc4f48da333d
|
| 3 |
+
size 152238
|
last-checkpoint/global_step700/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eda5edf08baa6742371faab8836bbaaaefb59c558bf7648e07471d1f9cb94572
|
| 3 |
+
size 152238
|
last-checkpoint/global_step700/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:066effbb5600bcd5a6257c2386143b9784c2e3055c47b8e8155cda1fef9ad1b2
|
| 3 |
+
size 152238
|
last-checkpoint/global_step700/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:510c92c97451d4269f524888663e8c58c9f65608affe7d5aefed5707dfabece1
|
| 3 |
+
size 152238
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step700
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08ee93655f035f40cef98d94e21df0215201bfd9c2fd009c63503f74d4bd0676
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f9350b4bfefd5190b618e0103ff8128fab616f2df08e300e5789f194a7e25b8
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd0a399dabcc87f1904a1f24d9d7781d4c2d3c109c95dd2958fca743902bd75c
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e90189ce66cbbdd26dcd499b49b05660c650805c2cfc5e25340f61c20bbb952
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed875039ee3baaee6a245c8988a3754c26fb7f9e800cc58167646a8642969266
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 40,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4399,6 +4399,730 @@
|
|
| 4399 |
"eval_samples_per_second": 2.127,
|
| 4400 |
"eval_steps_per_second": 0.17,
|
| 4401 |
"step": 600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4402 |
}
|
| 4403 |
],
|
| 4404 |
"logging_steps": 1,
|
|
@@ -4418,7 +5142,7 @@
|
|
| 4418 |
"attributes": {}
|
| 4419 |
}
|
| 4420 |
},
|
| 4421 |
-
"total_flos":
|
| 4422 |
"train_batch_size": 4,
|
| 4423 |
"trial_name": null,
|
| 4424 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.8607439286812173,
|
| 5 |
"eval_steps": 40,
|
| 6 |
+
"global_step": 700,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4399 |
"eval_samples_per_second": 2.127,
|
| 4400 |
"eval_steps_per_second": 0.17,
|
| 4401 |
"step": 600
|
| 4402 |
+
},
|
| 4403 |
+
{
|
| 4404 |
+
"epoch": 0.7390101444820166,
|
| 4405 |
+
"grad_norm": 0.2943806410067254,
|
| 4406 |
+
"learning_rate": 3.8712690512345555e-06,
|
| 4407 |
+
"loss": 0.1728,
|
| 4408 |
+
"step": 601
|
| 4409 |
+
},
|
| 4410 |
+
{
|
| 4411 |
+
"epoch": 0.7402397786658469,
|
| 4412 |
+
"grad_norm": 0.42925890423626006,
|
| 4413 |
+
"learning_rate": 3.837366411839114e-06,
|
| 4414 |
+
"loss": 0.1948,
|
| 4415 |
+
"step": 602
|
| 4416 |
+
},
|
| 4417 |
+
{
|
| 4418 |
+
"epoch": 0.7414694128496773,
|
| 4419 |
+
"grad_norm": 0.3527421902997394,
|
| 4420 |
+
"learning_rate": 3.8035775956118416e-06,
|
| 4421 |
+
"loss": 0.1413,
|
| 4422 |
+
"step": 603
|
| 4423 |
+
},
|
| 4424 |
+
{
|
| 4425 |
+
"epoch": 0.7426990470335075,
|
| 4426 |
+
"grad_norm": 0.4175876107718813,
|
| 4427 |
+
"learning_rate": 3.7699032266284863e-06,
|
| 4428 |
+
"loss": 0.2727,
|
| 4429 |
+
"step": 604
|
| 4430 |
+
},
|
| 4431 |
+
{
|
| 4432 |
+
"epoch": 0.7439286812173378,
|
| 4433 |
+
"grad_norm": 0.4187920324268778,
|
| 4434 |
+
"learning_rate": 3.736343926850954e-06,
|
| 4435 |
+
"loss": 0.1588,
|
| 4436 |
+
"step": 605
|
| 4437 |
+
},
|
| 4438 |
+
{
|
| 4439 |
+
"epoch": 0.7451583154011682,
|
| 4440 |
+
"grad_norm": 0.3852760473136735,
|
| 4441 |
+
"learning_rate": 3.702900316115836e-06,
|
| 4442 |
+
"loss": 0.174,
|
| 4443 |
+
"step": 606
|
| 4444 |
+
},
|
| 4445 |
+
{
|
| 4446 |
+
"epoch": 0.7463879495849984,
|
| 4447 |
+
"grad_norm": 0.42823407651531814,
|
| 4448 |
+
"learning_rate": 3.6695730121229734e-06,
|
| 4449 |
+
"loss": 0.1938,
|
| 4450 |
+
"step": 607
|
| 4451 |
+
},
|
| 4452 |
+
{
|
| 4453 |
+
"epoch": 0.7476175837688288,
|
| 4454 |
+
"grad_norm": 0.3509868875989032,
|
| 4455 |
+
"learning_rate": 3.6363626304240185e-06,
|
| 4456 |
+
"loss": 0.1475,
|
| 4457 |
+
"step": 608
|
| 4458 |
+
},
|
| 4459 |
+
{
|
| 4460 |
+
"epoch": 0.7488472179526591,
|
| 4461 |
+
"grad_norm": 0.2971798682387744,
|
| 4462 |
+
"learning_rate": 3.6032697844110896e-06,
|
| 4463 |
+
"loss": 0.1767,
|
| 4464 |
+
"step": 609
|
| 4465 |
+
},
|
| 4466 |
+
{
|
| 4467 |
+
"epoch": 0.7500768521364894,
|
| 4468 |
+
"grad_norm": 0.6072125452251376,
|
| 4469 |
+
"learning_rate": 3.5702950853054284e-06,
|
| 4470 |
+
"loss": 0.1699,
|
| 4471 |
+
"step": 610
|
| 4472 |
+
},
|
| 4473 |
+
{
|
| 4474 |
+
"epoch": 0.7513064863203197,
|
| 4475 |
+
"grad_norm": 0.42164026185503256,
|
| 4476 |
+
"learning_rate": 3.5374391421461273e-06,
|
| 4477 |
+
"loss": 0.1412,
|
| 4478 |
+
"step": 611
|
| 4479 |
+
},
|
| 4480 |
+
{
|
| 4481 |
+
"epoch": 0.75253612050415,
|
| 4482 |
+
"grad_norm": 0.3486983507433236,
|
| 4483 |
+
"learning_rate": 3.5047025617788578e-06,
|
| 4484 |
+
"loss": 0.1936,
|
| 4485 |
+
"step": 612
|
| 4486 |
+
},
|
| 4487 |
+
{
|
| 4488 |
+
"epoch": 0.7537657546879803,
|
| 4489 |
+
"grad_norm": 0.4729724505869417,
|
| 4490 |
+
"learning_rate": 3.4720859488446744e-06,
|
| 4491 |
+
"loss": 0.2232,
|
| 4492 |
+
"step": 613
|
| 4493 |
+
},
|
| 4494 |
+
{
|
| 4495 |
+
"epoch": 0.7549953888718106,
|
| 4496 |
+
"grad_norm": 0.4712358882570717,
|
| 4497 |
+
"learning_rate": 3.4395899057688575e-06,
|
| 4498 |
+
"loss": 0.1957,
|
| 4499 |
+
"step": 614
|
| 4500 |
+
},
|
| 4501 |
+
{
|
| 4502 |
+
"epoch": 0.756225023055641,
|
| 4503 |
+
"grad_norm": 0.30705322198688406,
|
| 4504 |
+
"learning_rate": 3.407215032749763e-06,
|
| 4505 |
+
"loss": 0.1771,
|
| 4506 |
+
"step": 615
|
| 4507 |
+
},
|
| 4508 |
+
{
|
| 4509 |
+
"epoch": 0.7574546572394713,
|
| 4510 |
+
"grad_norm": 0.4492814208789423,
|
| 4511 |
+
"learning_rate": 3.374961927747751e-06,
|
| 4512 |
+
"loss": 0.2017,
|
| 4513 |
+
"step": 616
|
| 4514 |
+
},
|
| 4515 |
+
{
|
| 4516 |
+
"epoch": 0.7586842914233015,
|
| 4517 |
+
"grad_norm": 0.4380941744123555,
|
| 4518 |
+
"learning_rate": 3.342831186474149e-06,
|
| 4519 |
+
"loss": 0.2032,
|
| 4520 |
+
"step": 617
|
| 4521 |
+
},
|
| 4522 |
+
{
|
| 4523 |
+
"epoch": 0.7599139256071319,
|
| 4524 |
+
"grad_norm": 0.3711476811320281,
|
| 4525 |
+
"learning_rate": 3.31082340238023e-06,
|
| 4526 |
+
"loss": 0.139,
|
| 4527 |
+
"step": 618
|
| 4528 |
+
},
|
| 4529 |
+
{
|
| 4530 |
+
"epoch": 0.7611435597909622,
|
| 4531 |
+
"grad_norm": 0.4366197359235773,
|
| 4532 |
+
"learning_rate": 3.27893916664626e-06,
|
| 4533 |
+
"loss": 0.1726,
|
| 4534 |
+
"step": 619
|
| 4535 |
+
},
|
| 4536 |
+
{
|
| 4537 |
+
"epoch": 0.7623731939747925,
|
| 4538 |
+
"grad_norm": 0.3831073094979708,
|
| 4539 |
+
"learning_rate": 3.2471790681705928e-06,
|
| 4540 |
+
"loss": 0.1734,
|
| 4541 |
+
"step": 620
|
| 4542 |
+
},
|
| 4543 |
+
{
|
| 4544 |
+
"epoch": 0.7636028281586228,
|
| 4545 |
+
"grad_norm": 0.3832625976759797,
|
| 4546 |
+
"learning_rate": 3.215543693558769e-06,
|
| 4547 |
+
"loss": 0.1326,
|
| 4548 |
+
"step": 621
|
| 4549 |
+
},
|
| 4550 |
+
{
|
| 4551 |
+
"epoch": 0.7648324623424532,
|
| 4552 |
+
"grad_norm": 0.4637885564290134,
|
| 4553 |
+
"learning_rate": 3.1840336271126935e-06,
|
| 4554 |
+
"loss": 0.213,
|
| 4555 |
+
"step": 622
|
| 4556 |
+
},
|
| 4557 |
+
{
|
| 4558 |
+
"epoch": 0.7660620965262834,
|
| 4559 |
+
"grad_norm": 0.5509391377682509,
|
| 4560 |
+
"learning_rate": 3.152649450819852e-06,
|
| 4561 |
+
"loss": 0.202,
|
| 4562 |
+
"step": 623
|
| 4563 |
+
},
|
| 4564 |
+
{
|
| 4565 |
+
"epoch": 0.7672917307101137,
|
| 4566 |
+
"grad_norm": 0.4604352454314464,
|
| 4567 |
+
"learning_rate": 3.1213917443425456e-06,
|
| 4568 |
+
"loss": 0.2395,
|
| 4569 |
+
"step": 624
|
| 4570 |
+
},
|
| 4571 |
+
{
|
| 4572 |
+
"epoch": 0.7685213648939441,
|
| 4573 |
+
"grad_norm": 0.5005650818328251,
|
| 4574 |
+
"learning_rate": 3.0902610850071922e-06,
|
| 4575 |
+
"loss": 0.1712,
|
| 4576 |
+
"step": 625
|
| 4577 |
+
},
|
| 4578 |
+
{
|
| 4579 |
+
"epoch": 0.7697509990777743,
|
| 4580 |
+
"grad_norm": 0.3297795229391836,
|
| 4581 |
+
"learning_rate": 3.0592580477936606e-06,
|
| 4582 |
+
"loss": 0.1249,
|
| 4583 |
+
"step": 626
|
| 4584 |
+
},
|
| 4585 |
+
{
|
| 4586 |
+
"epoch": 0.7709806332616047,
|
| 4587 |
+
"grad_norm": 0.37133417357695125,
|
| 4588 |
+
"learning_rate": 3.0283832053246644e-06,
|
| 4589 |
+
"loss": 0.1496,
|
| 4590 |
+
"step": 627
|
| 4591 |
+
},
|
| 4592 |
+
{
|
| 4593 |
+
"epoch": 0.772210267445435,
|
| 4594 |
+
"grad_norm": 1.0851806228661502,
|
| 4595 |
+
"learning_rate": 2.99763712785516e-06,
|
| 4596 |
+
"loss": 0.1834,
|
| 4597 |
+
"step": 628
|
| 4598 |
+
},
|
| 4599 |
+
{
|
| 4600 |
+
"epoch": 0.7734399016292653,
|
| 4601 |
+
"grad_norm": 0.5871194480383413,
|
| 4602 |
+
"learning_rate": 2.967020383261834e-06,
|
| 4603 |
+
"loss": 0.2054,
|
| 4604 |
+
"step": 629
|
| 4605 |
+
},
|
| 4606 |
+
{
|
| 4607 |
+
"epoch": 0.7746695358130956,
|
| 4608 |
+
"grad_norm": 0.5149728889777226,
|
| 4609 |
+
"learning_rate": 2.9365335370326143e-06,
|
| 4610 |
+
"loss": 0.1972,
|
| 4611 |
+
"step": 630
|
| 4612 |
+
},
|
| 4613 |
+
{
|
| 4614 |
+
"epoch": 0.7758991699969259,
|
| 4615 |
+
"grad_norm": 0.37527398302282,
|
| 4616 |
+
"learning_rate": 2.9061771522562143e-06,
|
| 4617 |
+
"loss": 0.1492,
|
| 4618 |
+
"step": 631
|
| 4619 |
+
},
|
| 4620 |
+
{
|
| 4621 |
+
"epoch": 0.7771288041807562,
|
| 4622 |
+
"grad_norm": 0.4284583342223879,
|
| 4623 |
+
"learning_rate": 2.875951789611734e-06,
|
| 4624 |
+
"loss": 0.1937,
|
| 4625 |
+
"step": 632
|
| 4626 |
+
},
|
| 4627 |
+
{
|
| 4628 |
+
"epoch": 0.7783584383645865,
|
| 4629 |
+
"grad_norm": 0.4328792148070332,
|
| 4630 |
+
"learning_rate": 2.8458580073583262e-06,
|
| 4631 |
+
"loss": 0.1905,
|
| 4632 |
+
"step": 633
|
| 4633 |
+
},
|
| 4634 |
+
{
|
| 4635 |
+
"epoch": 0.7795880725484169,
|
| 4636 |
+
"grad_norm": 0.4067822771383594,
|
| 4637 |
+
"learning_rate": 2.8158963613248437e-06,
|
| 4638 |
+
"loss": 0.2048,
|
| 4639 |
+
"step": 634
|
| 4640 |
+
},
|
| 4641 |
+
{
|
| 4642 |
+
"epoch": 0.7808177067322472,
|
| 4643 |
+
"grad_norm": 0.5475925840409395,
|
| 4644 |
+
"learning_rate": 2.7860674048996174e-06,
|
| 4645 |
+
"loss": 0.2014,
|
| 4646 |
+
"step": 635
|
| 4647 |
+
},
|
| 4648 |
+
{
|
| 4649 |
+
"epoch": 0.7820473409160774,
|
| 4650 |
+
"grad_norm": 0.3714863801891058,
|
| 4651 |
+
"learning_rate": 2.756371689020214e-06,
|
| 4652 |
+
"loss": 0.1597,
|
| 4653 |
+
"step": 636
|
| 4654 |
+
},
|
| 4655 |
+
{
|
| 4656 |
+
"epoch": 0.7832769750999078,
|
| 4657 |
+
"grad_norm": 0.45403846500036404,
|
| 4658 |
+
"learning_rate": 2.7268097621632473e-06,
|
| 4659 |
+
"loss": 0.1588,
|
| 4660 |
+
"step": 637
|
| 4661 |
+
},
|
| 4662 |
+
{
|
| 4663 |
+
"epoch": 0.784506609283738,
|
| 4664 |
+
"grad_norm": 0.2750476426300895,
|
| 4665 |
+
"learning_rate": 2.697382170334275e-06,
|
| 4666 |
+
"loss": 0.1456,
|
| 4667 |
+
"step": 638
|
| 4668 |
+
},
|
| 4669 |
+
{
|
| 4670 |
+
"epoch": 0.7857362434675684,
|
| 4671 |
+
"grad_norm": 0.4122155448314921,
|
| 4672 |
+
"learning_rate": 2.6680894570577042e-06,
|
| 4673 |
+
"loss": 0.165,
|
| 4674 |
+
"step": 639
|
| 4675 |
+
},
|
| 4676 |
+
{
|
| 4677 |
+
"epoch": 0.7869658776513987,
|
| 4678 |
+
"grad_norm": 0.44104871745668295,
|
| 4679 |
+
"learning_rate": 2.638932163366742e-06,
|
| 4680 |
+
"loss": 0.1883,
|
| 4681 |
+
"step": 640
|
| 4682 |
+
},
|
| 4683 |
+
{
|
| 4684 |
+
"epoch": 0.7869658776513987,
|
| 4685 |
+
"eval_accuracy": 0.8021390374331551,
|
| 4686 |
+
"eval_f1": 0.5066666666666667,
|
| 4687 |
+
"eval_loss": 0.42875000834465027,
|
| 4688 |
+
"eval_precision": 0.76,
|
| 4689 |
+
"eval_recall": 0.38,
|
| 4690 |
+
"eval_runtime": 22.3064,
|
| 4691 |
+
"eval_samples_per_second": 2.242,
|
| 4692 |
+
"eval_steps_per_second": 0.179,
|
| 4693 |
+
"step": 640
|
| 4694 |
+
},
|
| 4695 |
+
{
|
| 4696 |
+
"epoch": 0.7881955118352291,
|
| 4697 |
+
"grad_norm": 0.38537966631812437,
|
| 4698 |
+
"learning_rate": 2.6099108277934105e-06,
|
| 4699 |
+
"loss": 0.1942,
|
| 4700 |
+
"step": 641
|
| 4701 |
+
},
|
| 4702 |
+
{
|
| 4703 |
+
"epoch": 0.7894251460190593,
|
| 4704 |
+
"grad_norm": 0.47302017581744826,
|
| 4705 |
+
"learning_rate": 2.581025986358602e-06,
|
| 4706 |
+
"loss": 0.2733,
|
| 4707 |
+
"step": 642
|
| 4708 |
+
},
|
| 4709 |
+
{
|
| 4710 |
+
"epoch": 0.7906547802028896,
|
| 4711 |
+
"grad_norm": 0.4006638675446945,
|
| 4712 |
+
"learning_rate": 2.5522781725621814e-06,
|
| 4713 |
+
"loss": 0.1905,
|
| 4714 |
+
"step": 643
|
| 4715 |
+
},
|
| 4716 |
+
{
|
| 4717 |
+
"epoch": 0.79188441438672,
|
| 4718 |
+
"grad_norm": 0.4264868084266065,
|
| 4719 |
+
"learning_rate": 2.523667917373125e-06,
|
| 4720 |
+
"loss": 0.2047,
|
| 4721 |
+
"step": 644
|
| 4722 |
+
},
|
| 4723 |
+
{
|
| 4724 |
+
"epoch": 0.7931140485705502,
|
| 4725 |
+
"grad_norm": 0.3954441386492838,
|
| 4726 |
+
"learning_rate": 2.4951957492197097e-06,
|
| 4727 |
+
"loss": 0.1377,
|
| 4728 |
+
"step": 645
|
| 4729 |
+
},
|
| 4730 |
+
{
|
| 4731 |
+
"epoch": 0.7943436827543806,
|
| 4732 |
+
"grad_norm": 0.39481889488214283,
|
| 4733 |
+
"learning_rate": 2.4668621939797745e-06,
|
| 4734 |
+
"loss": 0.1402,
|
| 4735 |
+
"step": 646
|
| 4736 |
+
},
|
| 4737 |
+
{
|
| 4738 |
+
"epoch": 0.7955733169382109,
|
| 4739 |
+
"grad_norm": 0.5271696297567287,
|
| 4740 |
+
"learning_rate": 2.438667774970981e-06,
|
| 4741 |
+
"loss": 0.2091,
|
| 4742 |
+
"step": 647
|
| 4743 |
+
},
|
| 4744 |
+
{
|
| 4745 |
+
"epoch": 0.7968029511220412,
|
| 4746 |
+
"grad_norm": 0.40581144727582685,
|
| 4747 |
+
"learning_rate": 2.4106130129411608e-06,
|
| 4748 |
+
"loss": 0.1898,
|
| 4749 |
+
"step": 648
|
| 4750 |
+
},
|
| 4751 |
+
{
|
| 4752 |
+
"epoch": 0.7980325853058715,
|
| 4753 |
+
"grad_norm": 0.4102532645005857,
|
| 4754 |
+
"learning_rate": 2.3826984260587084e-06,
|
| 4755 |
+
"loss": 0.2066,
|
| 4756 |
+
"step": 649
|
| 4757 |
+
},
|
| 4758 |
+
{
|
| 4759 |
+
"epoch": 0.7992622194897018,
|
| 4760 |
+
"grad_norm": 0.388703790445828,
|
| 4761 |
+
"learning_rate": 2.354924529902978e-06,
|
| 4762 |
+
"loss": 0.1987,
|
| 4763 |
+
"step": 650
|
| 4764 |
+
},
|
| 4765 |
+
{
|
| 4766 |
+
"epoch": 0.8004918536735321,
|
| 4767 |
+
"grad_norm": 0.4906618445456134,
|
| 4768 |
+
"learning_rate": 2.327291837454799e-06,
|
| 4769 |
+
"loss": 0.1837,
|
| 4770 |
+
"step": 651
|
| 4771 |
+
},
|
| 4772 |
+
{
|
| 4773 |
+
"epoch": 0.8017214878573624,
|
| 4774 |
+
"grad_norm": 0.37536494595757913,
|
| 4775 |
+
"learning_rate": 2.2998008590869838e-06,
|
| 4776 |
+
"loss": 0.1657,
|
| 4777 |
+
"step": 652
|
| 4778 |
+
},
|
| 4779 |
+
{
|
| 4780 |
+
"epoch": 0.8029511220411928,
|
| 4781 |
+
"grad_norm": 0.3812431916923574,
|
| 4782 |
+
"learning_rate": 2.2724521025548828e-06,
|
| 4783 |
+
"loss": 0.1008,
|
| 4784 |
+
"step": 653
|
| 4785 |
+
},
|
| 4786 |
+
{
|
| 4787 |
+
"epoch": 0.804180756225023,
|
| 4788 |
+
"grad_norm": 0.3734890292027527,
|
| 4789 |
+
"learning_rate": 2.245246072987045e-06,
|
| 4790 |
+
"loss": 0.1343,
|
| 4791 |
+
"step": 654
|
| 4792 |
+
},
|
| 4793 |
+
{
|
| 4794 |
+
"epoch": 0.8054103904088533,
|
| 4795 |
+
"grad_norm": 0.4423063838480555,
|
| 4796 |
+
"learning_rate": 2.2181832728758635e-06,
|
| 4797 |
+
"loss": 0.2222,
|
| 4798 |
+
"step": 655
|
| 4799 |
+
},
|
| 4800 |
+
{
|
| 4801 |
+
"epoch": 0.8066400245926837,
|
| 4802 |
+
"grad_norm": 0.3896545849527162,
|
| 4803 |
+
"learning_rate": 2.191264202068286e-06,
|
| 4804 |
+
"loss": 0.1766,
|
| 4805 |
+
"step": 656
|
| 4806 |
+
},
|
| 4807 |
+
{
|
| 4808 |
+
"epoch": 0.807869658776514,
|
| 4809 |
+
"grad_norm": 0.6024032080378133,
|
| 4810 |
+
"learning_rate": 2.1644893577566118e-06,
|
| 4811 |
+
"loss": 0.231,
|
| 4812 |
+
"step": 657
|
| 4813 |
+
},
|
| 4814 |
+
{
|
| 4815 |
+
"epoch": 0.8090992929603443,
|
| 4816 |
+
"grad_norm": 0.43861748495389236,
|
| 4817 |
+
"learning_rate": 2.137859234469286e-06,
|
| 4818 |
+
"loss": 0.2467,
|
| 4819 |
+
"step": 658
|
| 4820 |
+
},
|
| 4821 |
+
{
|
| 4822 |
+
"epoch": 0.8103289271441746,
|
| 4823 |
+
"grad_norm": 0.37033226791746354,
|
| 4824 |
+
"learning_rate": 2.1113743240617668e-06,
|
| 4825 |
+
"loss": 0.1337,
|
| 4826 |
+
"step": 659
|
| 4827 |
+
},
|
| 4828 |
+
{
|
| 4829 |
+
"epoch": 0.811558561328005,
|
| 4830 |
+
"grad_norm": 0.6398820179734428,
|
| 4831 |
+
"learning_rate": 2.08503511570746e-06,
|
| 4832 |
+
"loss": 0.1954,
|
| 4833 |
+
"step": 660
|
| 4834 |
+
},
|
| 4835 |
+
{
|
| 4836 |
+
"epoch": 0.8127881955118352,
|
| 4837 |
+
"grad_norm": 0.4504933775118792,
|
| 4838 |
+
"learning_rate": 2.058842095888658e-06,
|
| 4839 |
+
"loss": 0.18,
|
| 4840 |
+
"step": 661
|
| 4841 |
+
},
|
| 4842 |
+
{
|
| 4843 |
+
"epoch": 0.8140178296956655,
|
| 4844 |
+
"grad_norm": 0.361212739042047,
|
| 4845 |
+
"learning_rate": 2.0327957483875693e-06,
|
| 4846 |
+
"loss": 0.1489,
|
| 4847 |
+
"step": 662
|
| 4848 |
+
},
|
| 4849 |
+
{
|
| 4850 |
+
"epoch": 0.8152474638794959,
|
| 4851 |
+
"grad_norm": 0.307913369177724,
|
| 4852 |
+
"learning_rate": 2.006896554277388e-06,
|
| 4853 |
+
"loss": 0.1572,
|
| 4854 |
+
"step": 663
|
| 4855 |
+
},
|
| 4856 |
+
{
|
| 4857 |
+
"epoch": 0.8164770980633261,
|
| 4858 |
+
"grad_norm": 0.25426740831645195,
|
| 4859 |
+
"learning_rate": 1.981144991913392e-06,
|
| 4860 |
+
"loss": 0.12,
|
| 4861 |
+
"step": 664
|
| 4862 |
+
},
|
| 4863 |
+
{
|
| 4864 |
+
"epoch": 0.8177067322471565,
|
| 4865 |
+
"grad_norm": 0.3663288109181175,
|
| 4866 |
+
"learning_rate": 1.9555415369241228e-06,
|
| 4867 |
+
"loss": 0.1571,
|
| 4868 |
+
"step": 665
|
| 4869 |
+
},
|
| 4870 |
+
{
|
| 4871 |
+
"epoch": 0.8189363664309868,
|
| 4872 |
+
"grad_norm": 0.41662449029107057,
|
| 4873 |
+
"learning_rate": 1.930086662202589e-06,
|
| 4874 |
+
"loss": 0.1873,
|
| 4875 |
+
"step": 666
|
| 4876 |
+
},
|
| 4877 |
+
{
|
| 4878 |
+
"epoch": 0.820166000614817,
|
| 4879 |
+
"grad_norm": 0.40845173743188795,
|
| 4880 |
+
"learning_rate": 1.9047808378975485e-06,
|
| 4881 |
+
"loss": 0.1534,
|
| 4882 |
+
"step": 667
|
| 4883 |
+
},
|
| 4884 |
+
{
|
| 4885 |
+
"epoch": 0.8213956347986474,
|
| 4886 |
+
"grad_norm": 0.6212434671550456,
|
| 4887 |
+
"learning_rate": 1.8796245314048046e-06,
|
| 4888 |
+
"loss": 0.2374,
|
| 4889 |
+
"step": 668
|
| 4890 |
+
},
|
| 4891 |
+
{
|
| 4892 |
+
"epoch": 0.8226252689824777,
|
| 4893 |
+
"grad_norm": 0.3337054400199707,
|
| 4894 |
+
"learning_rate": 1.8546182073585828e-06,
|
| 4895 |
+
"loss": 0.184,
|
| 4896 |
+
"step": 669
|
| 4897 |
+
},
|
| 4898 |
+
{
|
| 4899 |
+
"epoch": 0.823854903166308,
|
| 4900 |
+
"grad_norm": 0.37408116822647747,
|
| 4901 |
+
"learning_rate": 1.829762327622958e-06,
|
| 4902 |
+
"loss": 0.1627,
|
| 4903 |
+
"step": 670
|
| 4904 |
+
},
|
| 4905 |
+
{
|
| 4906 |
+
"epoch": 0.8250845373501383,
|
| 4907 |
+
"grad_norm": 0.41291954814345744,
|
| 4908 |
+
"learning_rate": 1.805057351283307e-06,
|
| 4909 |
+
"loss": 0.1426,
|
| 4910 |
+
"step": 671
|
| 4911 |
+
},
|
| 4912 |
+
{
|
| 4913 |
+
"epoch": 0.8263141715339687,
|
| 4914 |
+
"grad_norm": 0.6232928915412197,
|
| 4915 |
+
"learning_rate": 1.7805037346378384e-06,
|
| 4916 |
+
"loss": 0.1939,
|
| 4917 |
+
"step": 672
|
| 4918 |
+
},
|
| 4919 |
+
{
|
| 4920 |
+
"epoch": 0.827543805717799,
|
| 4921 |
+
"grad_norm": 0.43962963164293384,
|
| 4922 |
+
"learning_rate": 1.756101931189169e-06,
|
| 4923 |
+
"loss": 0.2049,
|
| 4924 |
+
"step": 673
|
| 4925 |
+
},
|
| 4926 |
+
{
|
| 4927 |
+
"epoch": 0.8287734399016292,
|
| 4928 |
+
"grad_norm": 0.3747672424266052,
|
| 4929 |
+
"learning_rate": 1.7318523916359376e-06,
|
| 4930 |
+
"loss": 0.1644,
|
| 4931 |
+
"step": 674
|
| 4932 |
+
},
|
| 4933 |
+
{
|
| 4934 |
+
"epoch": 0.8300030740854596,
|
| 4935 |
+
"grad_norm": 0.4713865050667868,
|
| 4936 |
+
"learning_rate": 1.7077555638644838e-06,
|
| 4937 |
+
"loss": 0.2924,
|
| 4938 |
+
"step": 675
|
| 4939 |
+
},
|
| 4940 |
+
{
|
| 4941 |
+
"epoch": 0.8312327082692899,
|
| 4942 |
+
"grad_norm": 0.5391745289921438,
|
| 4943 |
+
"learning_rate": 1.6838118929405856e-06,
|
| 4944 |
+
"loss": 0.1767,
|
| 4945 |
+
"step": 676
|
| 4946 |
+
},
|
| 4947 |
+
{
|
| 4948 |
+
"epoch": 0.8324623424531202,
|
| 4949 |
+
"grad_norm": 0.35807178811591905,
|
| 4950 |
+
"learning_rate": 1.660021821101222e-06,
|
| 4951 |
+
"loss": 0.1718,
|
| 4952 |
+
"step": 677
|
| 4953 |
+
},
|
| 4954 |
+
{
|
| 4955 |
+
"epoch": 0.8336919766369505,
|
| 4956 |
+
"grad_norm": 0.5700152695384362,
|
| 4957 |
+
"learning_rate": 1.6363857877464161e-06,
|
| 4958 |
+
"loss": 0.1505,
|
| 4959 |
+
"step": 678
|
| 4960 |
+
},
|
| 4961 |
+
{
|
| 4962 |
+
"epoch": 0.8349216108207809,
|
| 4963 |
+
"grad_norm": 0.521349273286693,
|
| 4964 |
+
"learning_rate": 1.6129042294311227e-06,
|
| 4965 |
+
"loss": 0.1893,
|
| 4966 |
+
"step": 679
|
| 4967 |
+
},
|
| 4968 |
+
{
|
| 4969 |
+
"epoch": 0.8361512450046111,
|
| 4970 |
+
"grad_norm": 0.4881174981503527,
|
| 4971 |
+
"learning_rate": 1.5895775798571523e-06,
|
| 4972 |
+
"loss": 0.2403,
|
| 4973 |
+
"step": 680
|
| 4974 |
+
},
|
| 4975 |
+
{
|
| 4976 |
+
"epoch": 0.8361512450046111,
|
| 4977 |
+
"eval_accuracy": 0.8021390374331551,
|
| 4978 |
+
"eval_f1": 0.5066666666666667,
|
| 4979 |
+
"eval_loss": 0.42875000834465027,
|
| 4980 |
+
"eval_precision": 0.76,
|
| 4981 |
+
"eval_recall": 0.38,
|
| 4982 |
+
"eval_runtime": 23.134,
|
| 4983 |
+
"eval_samples_per_second": 2.161,
|
| 4984 |
+
"eval_steps_per_second": 0.173,
|
| 4985 |
+
"step": 680
|
| 4986 |
+
},
|
| 4987 |
+
{
|
| 4988 |
+
"epoch": 0.8373808791884414,
|
| 4989 |
+
"grad_norm": 0.43157618057929154,
|
| 4990 |
+
"learning_rate": 1.5664062698651706e-06,
|
| 4991 |
+
"loss": 0.1824,
|
| 4992 |
+
"step": 681
|
| 4993 |
+
},
|
| 4994 |
+
{
|
| 4995 |
+
"epoch": 0.8386105133722718,
|
| 4996 |
+
"grad_norm": 0.5760272230077988,
|
| 4997 |
+
"learning_rate": 1.5433907274267357e-06,
|
| 4998 |
+
"loss": 0.2397,
|
| 4999 |
+
"step": 682
|
| 5000 |
+
},
|
| 5001 |
+
{
|
| 5002 |
+
"epoch": 0.839840147556102,
|
| 5003 |
+
"grad_norm": 0.5350905991023048,
|
| 5004 |
+
"learning_rate": 1.5205313776364028e-06,
|
| 5005 |
+
"loss": 0.1892,
|
| 5006 |
+
"step": 683
|
| 5007 |
+
},
|
| 5008 |
+
{
|
| 5009 |
+
"epoch": 0.8410697817399324,
|
| 5010 |
+
"grad_norm": 0.61137934990804,
|
| 5011 |
+
"learning_rate": 1.4978286427038602e-06,
|
| 5012 |
+
"loss": 0.2348,
|
| 5013 |
+
"step": 684
|
| 5014 |
+
},
|
| 5015 |
+
{
|
| 5016 |
+
"epoch": 0.8422994159237627,
|
| 5017 |
+
"grad_norm": 0.4331644305139785,
|
| 5018 |
+
"learning_rate": 1.4752829419461357e-06,
|
| 5019 |
+
"loss": 0.1937,
|
| 5020 |
+
"step": 685
|
| 5021 |
+
},
|
| 5022 |
+
{
|
| 5023 |
+
"epoch": 0.8435290501075929,
|
| 5024 |
+
"grad_norm": 0.3640781076289279,
|
| 5025 |
+
"learning_rate": 1.4528946917798603e-06,
|
| 5026 |
+
"loss": 0.1962,
|
| 5027 |
+
"step": 686
|
| 5028 |
+
},
|
| 5029 |
+
{
|
| 5030 |
+
"epoch": 0.8447586842914233,
|
| 5031 |
+
"grad_norm": 0.4244637100420945,
|
| 5032 |
+
"learning_rate": 1.4306643057135638e-06,
|
| 5033 |
+
"loss": 0.193,
|
| 5034 |
+
"step": 687
|
| 5035 |
+
},
|
| 5036 |
+
{
|
| 5037 |
+
"epoch": 0.8459883184752536,
|
| 5038 |
+
"grad_norm": 0.27253213925489794,
|
| 5039 |
+
"learning_rate": 1.4085921943400416e-06,
|
| 5040 |
+
"loss": 0.1582,
|
| 5041 |
+
"step": 688
|
| 5042 |
+
},
|
| 5043 |
+
{
|
| 5044 |
+
"epoch": 0.847217952659084,
|
| 5045 |
+
"grad_norm": 0.7026492760941759,
|
| 5046 |
+
"learning_rate": 1.3866787653287804e-06,
|
| 5047 |
+
"loss": 0.2727,
|
| 5048 |
+
"step": 689
|
| 5049 |
+
},
|
| 5050 |
+
{
|
| 5051 |
+
"epoch": 0.8484475868429142,
|
| 5052 |
+
"grad_norm": 0.3357057600160637,
|
| 5053 |
+
"learning_rate": 1.3649244234184157e-06,
|
| 5054 |
+
"loss": 0.1395,
|
| 5055 |
+
"step": 690
|
| 5056 |
+
},
|
| 5057 |
+
{
|
| 5058 |
+
"epoch": 0.8496772210267446,
|
| 5059 |
+
"grad_norm": 0.38849185683759185,
|
| 5060 |
+
"learning_rate": 1.3433295704092586e-06,
|
| 5061 |
+
"loss": 0.1367,
|
| 5062 |
+
"step": 691
|
| 5063 |
+
},
|
| 5064 |
+
{
|
| 5065 |
+
"epoch": 0.8509068552105749,
|
| 5066 |
+
"grad_norm": 0.5532934868131949,
|
| 5067 |
+
"learning_rate": 1.3218946051558867e-06,
|
| 5068 |
+
"loss": 0.2007,
|
| 5069 |
+
"step": 692
|
| 5070 |
+
},
|
| 5071 |
+
{
|
| 5072 |
+
"epoch": 0.8521364893944051,
|
| 5073 |
+
"grad_norm": 0.4093414023233572,
|
| 5074 |
+
"learning_rate": 1.3006199235597628e-06,
|
| 5075 |
+
"loss": 0.199,
|
| 5076 |
+
"step": 693
|
| 5077 |
+
},
|
| 5078 |
+
{
|
| 5079 |
+
"epoch": 0.8533661235782355,
|
| 5080 |
+
"grad_norm": 0.5800657790788337,
|
| 5081 |
+
"learning_rate": 1.279505918561923e-06,
|
| 5082 |
+
"loss": 0.1786,
|
| 5083 |
+
"step": 694
|
| 5084 |
+
},
|
| 5085 |
+
{
|
| 5086 |
+
"epoch": 0.8545957577620658,
|
| 5087 |
+
"grad_norm": 0.5604353644860381,
|
| 5088 |
+
"learning_rate": 1.2585529801357377e-06,
|
| 5089 |
+
"loss": 0.2597,
|
| 5090 |
+
"step": 695
|
| 5091 |
+
},
|
| 5092 |
+
{
|
| 5093 |
+
"epoch": 0.8558253919458961,
|
| 5094 |
+
"grad_norm": 0.4944214492031985,
|
| 5095 |
+
"learning_rate": 1.2377614952796825e-06,
|
| 5096 |
+
"loss": 0.1578,
|
| 5097 |
+
"step": 696
|
| 5098 |
+
},
|
| 5099 |
+
{
|
| 5100 |
+
"epoch": 0.8570550261297264,
|
| 5101 |
+
"grad_norm": 0.3580298395044867,
|
| 5102 |
+
"learning_rate": 1.217131848010209e-06,
|
| 5103 |
+
"loss": 0.145,
|
| 5104 |
+
"step": 697
|
| 5105 |
+
},
|
| 5106 |
+
{
|
| 5107 |
+
"epoch": 0.8582846603135568,
|
| 5108 |
+
"grad_norm": 0.49696207588289626,
|
| 5109 |
+
"learning_rate": 1.196664419354644e-06,
|
| 5110 |
+
"loss": 0.1847,
|
| 5111 |
+
"step": 698
|
| 5112 |
+
},
|
| 5113 |
+
{
|
| 5114 |
+
"epoch": 0.859514294497387,
|
| 5115 |
+
"grad_norm": 0.5676831498828142,
|
| 5116 |
+
"learning_rate": 1.176359587344158e-06,
|
| 5117 |
+
"loss": 0.2467,
|
| 5118 |
+
"step": 699
|
| 5119 |
+
},
|
| 5120 |
+
{
|
| 5121 |
+
"epoch": 0.8607439286812173,
|
| 5122 |
+
"grad_norm": 0.4791316046608471,
|
| 5123 |
+
"learning_rate": 1.1562177270067766e-06,
|
| 5124 |
+
"loss": 0.2128,
|
| 5125 |
+
"step": 700
|
| 5126 |
}
|
| 5127 |
],
|
| 5128 |
"logging_steps": 1,
|
|
|
|
| 5142 |
"attributes": {}
|
| 5143 |
}
|
| 5144 |
},
|
| 5145 |
+
"total_flos": 687762207244288.0,
|
| 5146 |
"train_batch_size": 4,
|
| 5147 |
"trial_name": null,
|
| 5148 |
"trial_params": null
|