Training in progress, step 2527, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2527/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +278 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 48680136
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8346ab13f4b7b21d2b29112b8902325a29dd3b846c062714d1a52cc8e3529039
|
| 3 |
size 48680136
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6430755e4d771a1d655da6a1122a88c06170b3283533cff22655ca0c300bc051
|
| 3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28ca477b3d667f9bd120a3f5a0027056380a000cb2c785e3fe48573dfbadae75
|
| 3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4143120b3fd3d0d24bbb06c958d63707e92aa4c5608c462cc256587b07bd48ae
|
| 3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:099aadae436f7329944571a6f95df04b3350e34b231b01a5b18f510dcb0825d6
|
| 3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d92d1db126a6c00c0911057ddc047de4f28b675d5ccd74f1c4acf86ffede2051
|
| 3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bff6ba32ab439063f5a6f5d43a0235bd6ded54cc7f5ed534b621ca96385b3c0
|
| 3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa6eab5768c985a15d15daec830906bf954e42edc06c6d64dfb78b6745f51c5b
|
| 3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:556d2e56ffee6c65efb42cecab9b558ae4d59b452762ba18ad76882392357d22
|
| 3 |
+
size 36474352
|
last-checkpoint/global_step2527/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a899397f8e1be9eece0d2cff42a99a02b88d1d3919acf5627d8b45ec803faf9c
|
| 3 |
+
size 390451
|
last-checkpoint/global_step2527/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0db1c1c054d19eb3996ec805796d485095f52b86ec41bef01e1836f441978533
|
| 3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2ebe7aef19a19af80ff1ee3b138d90ac19ded2df55cf1b4f48a7ff1bc200945
|
| 3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:626ab93bae5148e7ec4008808d2d0bd4e5f6b1218e8c6fc74862d0c2eea59369
|
| 3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f1e277faa2b654951b677e101231e6ef09a2f3aff4c548c70549d90dfff4993
|
| 3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d86545118cda5ad95577530de8b0298285184172fe3fd4ef69b743506cd581ca
|
| 3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80a94252d81267a9b39b875517331b81ed9ff36646278c02ed653bb435373d66
|
| 3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa1e7083f8aee635a411566a4eba90d63697b0c3943fef588ac35000614a4f80
|
| 3 |
+
size 390387
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step2527
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c02adaeb104a456cce73f3dfce7ef82bd1d39537f6f938fef85bbda510bd32a
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f975a10d97cf0c5780ff024524346681750a4672878c083063f7246a341e0fa2
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6fb704c634e883f9e3881266a3dcb5ba23374c8ebf181812c9f96a130e61f66
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bc646933e05a0c4234556cd6f0c7075bab2d33a5950ff4bbe972166c1092320
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b495a6c499c1edc6102a33cf75929a4f32fe1644e38ef1d57d162fb64bfb915f
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f8a3798560394c1c82cce7ebec75141fc923441919268c7c53e22a06fb17656
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b1aedf073e7fd48123a8e0decc6dd465a3bcde85b1242e59856358cf0587a7d
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26c6ae9753603f5ca51b6367b59f3afc788d1cceb974eb84386f65a6283806de
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba1417a4512f0520215fff01c82d65455bbbf8bb1817ff1d5e6b2e4fc2d3773d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 5,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -26024,6 +26024,280 @@
|
|
| 26024 |
"eval_samples_per_second": 6.589,
|
| 26025 |
"eval_steps_per_second": 0.22,
|
| 26026 |
"step": 2500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26027 |
}
|
| 26028 |
],
|
| 26029 |
"logging_steps": 1,
|
|
@@ -26038,12 +26312,12 @@
|
|
| 26038 |
"should_evaluate": false,
|
| 26039 |
"should_log": false,
|
| 26040 |
"should_save": true,
|
| 26041 |
-
"should_training_stop":
|
| 26042 |
},
|
| 26043 |
"attributes": {}
|
| 26044 |
}
|
| 26045 |
},
|
| 26046 |
-
"total_flos":
|
| 26047 |
"train_batch_size": 2,
|
| 26048 |
"trial_name": null,
|
| 26049 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
"eval_steps": 5,
|
| 6 |
+
"global_step": 2527,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 26024 |
"eval_samples_per_second": 6.589,
|
| 26025 |
"eval_steps_per_second": 0.22,
|
| 26026 |
"step": 2500
|
| 26027 |
+
},
|
| 26028 |
+
{
|
| 26029 |
+
"epoch": 0.9897111199050257,
|
| 26030 |
+
"grad_norm": 0.7640975936543402,
|
| 26031 |
+
"learning_rate": 3.225216210623327e-08,
|
| 26032 |
+
"loss": 0.1595,
|
| 26033 |
+
"step": 2501
|
| 26034 |
+
},
|
| 26035 |
+
{
|
| 26036 |
+
"epoch": 0.9901068460625247,
|
| 26037 |
+
"grad_norm": 0.6935968775183601,
|
| 26038 |
+
"learning_rate": 2.981917874453344e-08,
|
| 26039 |
+
"loss": 0.147,
|
| 26040 |
+
"step": 2502
|
| 26041 |
+
},
|
| 26042 |
+
{
|
| 26043 |
+
"epoch": 0.9905025722200237,
|
| 26044 |
+
"grad_norm": 0.671927431715776,
|
| 26045 |
+
"learning_rate": 2.7481569312381995e-08,
|
| 26046 |
+
"loss": 0.0938,
|
| 26047 |
+
"step": 2503
|
| 26048 |
+
},
|
| 26049 |
+
{
|
| 26050 |
+
"epoch": 0.9908982983775227,
|
| 26051 |
+
"grad_norm": 0.7349943492731037,
|
| 26052 |
+
"learning_rate": 2.52393382713767e-08,
|
| 26053 |
+
"loss": 0.1265,
|
| 26054 |
+
"step": 2504
|
| 26055 |
+
},
|
| 26056 |
+
{
|
| 26057 |
+
"epoch": 0.9912940245350218,
|
| 26058 |
+
"grad_norm": 0.5079188246341098,
|
| 26059 |
+
"learning_rate": 2.3092489901083148e-08,
|
| 26060 |
+
"loss": 0.1685,
|
| 26061 |
+
"step": 2505
|
| 26062 |
+
},
|
| 26063 |
+
{
|
| 26064 |
+
"epoch": 0.9912940245350218,
|
| 26065 |
+
"eval_PRM Accuracy": 0.9047619047619048,
|
| 26066 |
+
"eval_PRM F1": 0.9464285714285714,
|
| 26067 |
+
"eval_PRM F1 AUC": 0.7058534185932119,
|
| 26068 |
+
"eval_PRM F1 AUC (fixed)": 0.8566158386620757,
|
| 26069 |
+
"eval_PRM F1 Neg": 0.5714285714285714,
|
| 26070 |
+
"eval_PRM NPV": 0.8888888888888888,
|
| 26071 |
+
"eval_PRM Precision": 0.905982905982906,
|
| 26072 |
+
"eval_PRM Recall": 0.9906542056074766,
|
| 26073 |
+
"eval_PRM Specificty": 0.42105263157894735,
|
| 26074 |
+
"eval_loss": 0.3131347596645355,
|
| 26075 |
+
"eval_runtime": 4.2821,
|
| 26076 |
+
"eval_samples_per_second": 7.006,
|
| 26077 |
+
"eval_steps_per_second": 0.234,
|
| 26078 |
+
"step": 2505
|
| 26079 |
+
},
|
| 26080 |
+
{
|
| 26081 |
+
"epoch": 0.9916897506925207,
|
| 26082 |
+
"grad_norm": 0.7550093866508485,
|
| 26083 |
+
"learning_rate": 2.1041028299012555e-08,
|
| 26084 |
+
"loss": 0.1852,
|
| 26085 |
+
"step": 2506
|
| 26086 |
+
},
|
| 26087 |
+
{
|
| 26088 |
+
"epoch": 0.9920854768500198,
|
| 26089 |
+
"grad_norm": 0.84761330923606,
|
| 26090 |
+
"learning_rate": 1.908495738061067e-08,
|
| 26091 |
+
"loss": 0.15,
|
| 26092 |
+
"step": 2507
|
| 26093 |
+
},
|
| 26094 |
+
{
|
| 26095 |
+
"epoch": 0.9924812030075187,
|
| 26096 |
+
"grad_norm": 0.5014629514556076,
|
| 26097 |
+
"learning_rate": 1.7224280879279964e-08,
|
| 26098 |
+
"loss": 0.1174,
|
| 26099 |
+
"step": 2508
|
| 26100 |
+
},
|
| 26101 |
+
{
|
| 26102 |
+
"epoch": 0.9928769291650178,
|
| 26103 |
+
"grad_norm": 0.7739608773483745,
|
| 26104 |
+
"learning_rate": 1.5459002346324135e-08,
|
| 26105 |
+
"loss": 0.1267,
|
| 26106 |
+
"step": 2509
|
| 26107 |
+
},
|
| 26108 |
+
{
|
| 26109 |
+
"epoch": 0.9932726553225169,
|
| 26110 |
+
"grad_norm": 0.6174389258151092,
|
| 26111 |
+
"learning_rate": 1.3789125150998061e-08,
|
| 26112 |
+
"loss": 0.1471,
|
| 26113 |
+
"step": 2510
|
| 26114 |
+
},
|
| 26115 |
+
{
|
| 26116 |
+
"epoch": 0.9932726553225169,
|
| 26117 |
+
"eval_PRM Accuracy": 0.9047619047619048,
|
| 26118 |
+
"eval_PRM F1": 0.9464285714285714,
|
| 26119 |
+
"eval_PRM F1 AUC": 0.7058534185932119,
|
| 26120 |
+
"eval_PRM F1 AUC (fixed)": 0.8541564190850959,
|
| 26121 |
+
"eval_PRM F1 Neg": 0.5714285714285714,
|
| 26122 |
+
"eval_PRM NPV": 0.8888888888888888,
|
| 26123 |
+
"eval_PRM Precision": 0.905982905982906,
|
| 26124 |
+
"eval_PRM Recall": 0.9906542056074766,
|
| 26125 |
+
"eval_PRM Specificty": 0.42105263157894735,
|
| 26126 |
+
"eval_loss": 0.3146809935569763,
|
| 26127 |
+
"eval_runtime": 5.0007,
|
| 26128 |
+
"eval_samples_per_second": 5.999,
|
| 26129 |
+
"eval_steps_per_second": 0.2,
|
| 26130 |
+
"step": 2510
|
| 26131 |
+
},
|
| 26132 |
+
{
|
| 26133 |
+
"epoch": 0.9936683814800158,
|
| 26134 |
+
"grad_norm": 0.8881221027776471,
|
| 26135 |
+
"learning_rate": 1.2214652480452282e-08,
|
| 26136 |
+
"loss": 0.1881,
|
| 26137 |
+
"step": 2511
|
| 26138 |
+
},
|
| 26139 |
+
{
|
| 26140 |
+
"epoch": 0.9940641076375148,
|
| 26141 |
+
"grad_norm": 0.6711036576482892,
|
| 26142 |
+
"learning_rate": 1.0735587339749665e-08,
|
| 26143 |
+
"loss": 0.1136,
|
| 26144 |
+
"step": 2512
|
| 26145 |
+
},
|
| 26146 |
+
{
|
| 26147 |
+
"epoch": 0.9944598337950139,
|
| 26148 |
+
"grad_norm": 0.7811317531255666,
|
| 26149 |
+
"learning_rate": 9.351932551854292e-09,
|
| 26150 |
+
"loss": 0.1778,
|
| 26151 |
+
"step": 2513
|
| 26152 |
+
},
|
| 26153 |
+
{
|
| 26154 |
+
"epoch": 0.9948555599525128,
|
| 26155 |
+
"grad_norm": 0.8843034018155372,
|
| 26156 |
+
"learning_rate": 8.063690757642572e-09,
|
| 26157 |
+
"loss": 0.2367,
|
| 26158 |
+
"step": 2514
|
| 26159 |
+
},
|
| 26160 |
+
{
|
| 26161 |
+
"epoch": 0.9952512861100119,
|
| 26162 |
+
"grad_norm": 0.8913039576623297,
|
| 26163 |
+
"learning_rate": 6.8708644158754775e-09,
|
| 26164 |
+
"loss": 0.1645,
|
| 26165 |
+
"step": 2515
|
| 26166 |
+
},
|
| 26167 |
+
{
|
| 26168 |
+
"epoch": 0.9952512861100119,
|
| 26169 |
+
"eval_PRM Accuracy": 0.9047619047619048,
|
| 26170 |
+
"eval_PRM F1": 0.9464285714285714,
|
| 26171 |
+
"eval_PRM F1 AUC": 0.7058534185932119,
|
| 26172 |
+
"eval_PRM F1 AUC (fixed)": 0.8553861288735858,
|
| 26173 |
+
"eval_PRM F1 Neg": 0.5714285714285714,
|
| 26174 |
+
"eval_PRM NPV": 0.8888888888888888,
|
| 26175 |
+
"eval_PRM Precision": 0.905982905982906,
|
| 26176 |
+
"eval_PRM Recall": 0.9906542056074766,
|
| 26177 |
+
"eval_PRM Specificty": 0.42105263157894735,
|
| 26178 |
+
"eval_loss": 0.3146321475505829,
|
| 26179 |
+
"eval_runtime": 4.3747,
|
| 26180 |
+
"eval_samples_per_second": 6.858,
|
| 26181 |
+
"eval_steps_per_second": 0.229,
|
| 26182 |
+
"step": 2515
|
| 26183 |
+
},
|
| 26184 |
+
{
|
| 26185 |
+
"epoch": 0.9956470122675108,
|
| 26186 |
+
"grad_norm": 0.6541800097659761,
|
| 26187 |
+
"learning_rate": 5.773455803187444e-09,
|
| 26188 |
+
"loss": 0.1339,
|
| 26189 |
+
"step": 2516
|
| 26190 |
+
},
|
| 26191 |
+
{
|
| 26192 |
+
"epoch": 0.9960427384250099,
|
| 26193 |
+
"grad_norm": 0.5875809598684076,
|
| 26194 |
+
"learning_rate": 4.771467014125231e-09,
|
| 26195 |
+
"loss": 0.1672,
|
| 26196 |
+
"step": 2517
|
| 26197 |
+
},
|
| 26198 |
+
{
|
| 26199 |
+
"epoch": 0.996438464582509,
|
| 26200 |
+
"grad_norm": 0.7159822425927463,
|
| 26201 |
+
"learning_rate": 3.864899961097956e-09,
|
| 26202 |
+
"loss": 0.1658,
|
| 26203 |
+
"step": 2518
|
| 26204 |
+
},
|
| 26205 |
+
{
|
| 26206 |
+
"epoch": 0.9968341907400079,
|
| 26207 |
+
"grad_norm": 1.0406525491292937,
|
| 26208 |
+
"learning_rate": 3.053756374393757e-09,
|
| 26209 |
+
"loss": 0.2251,
|
| 26210 |
+
"step": 2519
|
| 26211 |
+
},
|
| 26212 |
+
{
|
| 26213 |
+
"epoch": 0.997229916897507,
|
| 26214 |
+
"grad_norm": 0.6871265678261859,
|
| 26215 |
+
"learning_rate": 2.338037802174231e-09,
|
| 26216 |
+
"loss": 0.1341,
|
| 26217 |
+
"step": 2520
|
| 26218 |
+
},
|
| 26219 |
+
{
|
| 26220 |
+
"epoch": 0.997229916897507,
|
| 26221 |
+
"eval_PRM Accuracy": 0.9047619047619048,
|
| 26222 |
+
"eval_PRM F1": 0.9464285714285714,
|
| 26223 |
+
"eval_PRM F1 AUC": 0.7058534185932119,
|
| 26224 |
+
"eval_PRM F1 AUC (fixed)": 0.8566158386620757,
|
| 26225 |
+
"eval_PRM F1 Neg": 0.5714285714285714,
|
| 26226 |
+
"eval_PRM NPV": 0.8888888888888888,
|
| 26227 |
+
"eval_PRM Precision": 0.905982905982906,
|
| 26228 |
+
"eval_PRM Recall": 0.9906542056074766,
|
| 26229 |
+
"eval_PRM Specificty": 0.42105263157894735,
|
| 26230 |
+
"eval_loss": 0.31285807490348816,
|
| 26231 |
+
"eval_runtime": 4.6229,
|
| 26232 |
+
"eval_samples_per_second": 6.489,
|
| 26233 |
+
"eval_steps_per_second": 0.216,
|
| 26234 |
+
"step": 2520
|
| 26235 |
+
},
|
| 26236 |
+
{
|
| 26237 |
+
"epoch": 0.9976256430550059,
|
| 26238 |
+
"grad_norm": 0.6838195939717506,
|
| 26239 |
+
"learning_rate": 1.7177456104688905e-09,
|
| 26240 |
+
"loss": 0.185,
|
| 26241 |
+
"step": 2521
|
| 26242 |
+
},
|
| 26243 |
+
{
|
| 26244 |
+
"epoch": 0.998021369212505,
|
| 26245 |
+
"grad_norm": 0.9441501978794823,
|
| 26246 |
+
"learning_rate": 1.1928809831807108e-09,
|
| 26247 |
+
"loss": 0.2185,
|
| 26248 |
+
"step": 2522
|
| 26249 |
+
},
|
| 26250 |
+
{
|
| 26251 |
+
"epoch": 0.998417095370004,
|
| 26252 |
+
"grad_norm": 0.6337380468158322,
|
| 26253 |
+
"learning_rate": 7.634449220805806e-10,
|
| 26254 |
+
"loss": 0.1319,
|
| 26255 |
+
"step": 2523
|
| 26256 |
+
},
|
| 26257 |
+
{
|
| 26258 |
+
"epoch": 0.998812821527503,
|
| 26259 |
+
"grad_norm": 0.48889023343641086,
|
| 26260 |
+
"learning_rate": 4.294382467906477e-10,
|
| 26261 |
+
"loss": 0.11,
|
| 26262 |
+
"step": 2524
|
| 26263 |
+
},
|
| 26264 |
+
{
|
| 26265 |
+
"epoch": 0.999208547685002,
|
| 26266 |
+
"grad_norm": 0.898426655651998,
|
| 26267 |
+
"learning_rate": 1.9086159480097287e-10,
|
| 26268 |
+
"loss": 0.1946,
|
| 26269 |
+
"step": 2525
|
| 26270 |
+
},
|
| 26271 |
+
{
|
| 26272 |
+
"epoch": 0.999208547685002,
|
| 26273 |
+
"eval_PRM Accuracy": 0.9047619047619048,
|
| 26274 |
+
"eval_PRM F1": 0.9464285714285714,
|
| 26275 |
+
"eval_PRM F1 AUC": 0.7058534185932119,
|
| 26276 |
+
"eval_PRM F1 AUC (fixed)": 0.854402361042794,
|
| 26277 |
+
"eval_PRM F1 Neg": 0.5714285714285714,
|
| 26278 |
+
"eval_PRM NPV": 0.8888888888888888,
|
| 26279 |
+
"eval_PRM Precision": 0.905982905982906,
|
| 26280 |
+
"eval_PRM Recall": 0.9906542056074766,
|
| 26281 |
+
"eval_PRM Specificty": 0.42105263157894735,
|
| 26282 |
+
"eval_loss": 0.3133951723575592,
|
| 26283 |
+
"eval_runtime": 4.4087,
|
| 26284 |
+
"eval_samples_per_second": 6.805,
|
| 26285 |
+
"eval_steps_per_second": 0.227,
|
| 26286 |
+
"step": 2525
|
| 26287 |
+
},
|
| 26288 |
+
{
|
| 26289 |
+
"epoch": 0.999604273842501,
|
| 26290 |
+
"grad_norm": 0.6412635302679868,
|
| 26291 |
+
"learning_rate": 4.771542146952967e-11,
|
| 26292 |
+
"loss": 0.1794,
|
| 26293 |
+
"step": 2526
|
| 26294 |
+
},
|
| 26295 |
+
{
|
| 26296 |
+
"epoch": 1.0,
|
| 26297 |
+
"grad_norm": 0.7990569727457182,
|
| 26298 |
+
"learning_rate": 0.0,
|
| 26299 |
+
"loss": 0.1372,
|
| 26300 |
+
"step": 2527
|
| 26301 |
}
|
| 26302 |
],
|
| 26303 |
"logging_steps": 1,
|
|
|
|
| 26312 |
"should_evaluate": false,
|
| 26313 |
"should_log": false,
|
| 26314 |
"should_save": true,
|
| 26315 |
+
"should_training_stop": true
|
| 26316 |
},
|
| 26317 |
"attributes": {}
|
| 26318 |
}
|
| 26319 |
},
|
| 26320 |
+
"total_flos": 2549072128245760.0,
|
| 26321 |
"train_batch_size": 2,
|
| 26322 |
"trial_name": null,
|
| 26323 |
"trial_params": null
|