Daewon0808 commited on
Commit
7b2ef68
·
verified ·
1 Parent(s): 782c576

Training in progress, step 2096, checkpoint

Browse files
Files changed (28) hide show
  1. last-checkpoint/adapter_model.safetensors +1 -1
  2. last-checkpoint/global_step2096/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step2096/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step2096/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step2096/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step2096/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step2096/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step2096/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step2096/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  10. last-checkpoint/global_step2096/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step2096/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step2096/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step2096/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step2096/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step2096/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step2096/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/global_step2096/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  18. last-checkpoint/latest +1 -1
  19. last-checkpoint/rng_state_0.pth +1 -1
  20. last-checkpoint/rng_state_1.pth +1 -1
  21. last-checkpoint/rng_state_2.pth +1 -1
  22. last-checkpoint/rng_state_3.pth +1 -1
  23. last-checkpoint/rng_state_4.pth +1 -1
  24. last-checkpoint/rng_state_5.pth +1 -1
  25. last-checkpoint/rng_state_6.pth +1 -1
  26. last-checkpoint/rng_state_7.pth +1 -1
  27. last-checkpoint/scheduler.pt +1 -1
  28. last-checkpoint/trainer_state.json +999 -4
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6858b7cacf8da273466148c63ce37c609bc72964df895f364ba999052d67cb02
3
  size 80792880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1a2ebef4c2fa62ef083acf0283325484beb150e65ae9598b4f8e6997ab30d55
3
  size 80792880
last-checkpoint/global_step2096/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab09b5694eefe7786e712dcf5ef338807f00cac67b1a8440d22511c7e4545758
3
+ size 60558832
last-checkpoint/global_step2096/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca9160ed484219503a480434fbf574571e9d5f1d720b21c5e2a0b9dfb85dd935
3
+ size 60558832
last-checkpoint/global_step2096/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5109c80b3b772cfe133a4e232d0b1f6517a21abd3ce07abeb99f9168a5fcbfc0
3
+ size 60558832
last-checkpoint/global_step2096/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83f0652215971f9411505e7ff6276df7910126de6633c2aaf02ff448702d5509
3
+ size 60558832
last-checkpoint/global_step2096/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05414cb558c1e89c7599d8b422c0e345864d88b77a36f695e9914d1d15c0df3b
3
+ size 60558832
last-checkpoint/global_step2096/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31a7547d2d3244cb3bbc140314965514ad539d6f70c7faa1acb1900a4a9aac9d
3
+ size 60558832
last-checkpoint/global_step2096/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f70b71f3f411d80968f7911ca95e802eea151f860d81e428b05f518e6e4adb9
3
+ size 60558832
last-checkpoint/global_step2096/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fabbe1238e434b478f60b7683cb4ad2201bf72030aad2b56611a0d100ad4180f
3
+ size 60558832
last-checkpoint/global_step2096/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c56be51a75f861a6c1061df855e1c773b43908a82058ceae617ede592760334b
3
+ size 390038
last-checkpoint/global_step2096/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba77dce12ffc36f83474659416adc00392fd3cc22aa4809c0fc78cd41a6ee70
3
+ size 390038
last-checkpoint/global_step2096/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ebb449d60f3d1684c4ffe693dbb7576d2a14869d26db198000fa7cc2a248216
3
+ size 390038
last-checkpoint/global_step2096/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eae9aea2a7e808c932e611a4588fb8843e462e2bf40d55f8736a53bb22223393
3
+ size 390038
last-checkpoint/global_step2096/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28c16c282fc532c92df24aae3e94d6a192f31d3d3a9dc48bcdff32f0b9c34e9d
3
+ size 390038
last-checkpoint/global_step2096/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fc633928076b503a85f121de6cc6f5cc0f95ff8e1b0a1d15ed02a5b870bed57
3
+ size 390038
last-checkpoint/global_step2096/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1b9d4e33cc40d6590ad04654584253598b7503359e7747e703703a00d5e1c57
3
+ size 390038
last-checkpoint/global_step2096/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5c6de32f80e510731cf621e564d4fbaee82e9d81f5a5e02e37e05a8a81c6fa6
3
+ size 390038
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2000
 
1
+ global_step2096
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cc5c0f1dd8463e5c58f14317bdebb3c5833d534b938627ce207723dd0b7e7f8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3db20ad821c0bd6ef3820f3ee9a15e66a9f5ac2f7ddd41a44dca6d3296d1ee8d
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:214889c1db734b7da2083b307acb5ad3efffdf3efd538ba8a9398c61c9d624a6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8b3f45ae1458240bdc90980406976b3d3415ee685fe0bbc0c9bd438b38d4cc3
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b3661b41e96ef0c059587fa2433b349cb391639e7cc40b210b54dd421754a42
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4468d58f1c2f91dc50e131aecf9444c837c55f89d91de410985021a52c7b9d3c
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a00a0cf3269503efee297bfaa4a0177630fe8d2ab36cdc010884706c6927e2b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:653ffb9d097b24cf1819a32c53fd01044f4b6df3a39d0d1a41f49d567234e621
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a077c3964abf442ddbb13bd61d8f22de1f48735ef155591994eb411ae01e53a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37e56dc24ba4e2baf45c62e54eda0982ce52c5db9637972a21bf3ccebc89a7bf
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a15004e563ff52373d189f7cc7ee16f283f740197d25f3a08a8fc58e0691c661
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9307ebbf541138419e4ea948891c68bec3ee50264969db8d7ddd9e305b6fae68
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0acbe321c626ff5100b5ee5e3eb751e22685631c6f9939d317f95d717a87875e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8056cfa8e1113bd740eada2f62b9f13713a8fcff8af18eff52163e7abdf8d26e
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56e0664b3713d71bc66950e282d440ee23a20cdb6e0d052e4fedc92caf00a354
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f63a80cbfc65d36cb0a6c908bb11d8b46ac29771d0b22925818162c7defb651
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f4da5e5d48f3657b3469ffe99496799aa1e0c96ebacbde9407ba10230a28ebf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f43f1b551d131f2e15824dafdf308fab750fa72bbedcb53092f970029438269d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9539709038874314,
5
  "eval_steps": 5,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -20824,6 +20824,1001 @@
20824
  "eval_samples_per_second": 3.243,
20825
  "eval_steps_per_second": 0.147,
20826
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20827
  }
20828
  ],
20829
  "logging_steps": 1,
@@ -20838,12 +21833,12 @@
20838
  "should_evaluate": false,
20839
  "should_log": false,
20840
  "should_save": true,
20841
- "should_training_stop": false
20842
  },
20843
  "attributes": {}
20844
  }
20845
  },
20846
- "total_flos": 2290427323154432.0,
20847
  "train_batch_size": 2,
20848
  "trial_name": null,
20849
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9997615072740281,
5
  "eval_steps": 5,
6
+ "global_step": 2096,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
20824
  "eval_samples_per_second": 3.243,
20825
  "eval_steps_per_second": 0.147,
20826
  "step": 2000
20827
+ },
20828
+ {
20829
+ "epoch": 0.9544478893393752,
20830
+ "grad_norm": 0.46363001931995473,
20831
+ "learning_rate": 6.247368302631396e-07,
20832
+ "loss": 0.1768,
20833
+ "step": 2001
20834
+ },
20835
+ {
20836
+ "epoch": 0.9549248747913188,
20837
+ "grad_norm": 0.5202493640608246,
20838
+ "learning_rate": 6.116804407097165e-07,
20839
+ "loss": 0.2037,
20840
+ "step": 2002
20841
+ },
20842
+ {
20843
+ "epoch": 0.9554018602432626,
20844
+ "grad_norm": 0.5187833918929314,
20845
+ "learning_rate": 5.987610890004159e-07,
20846
+ "loss": 0.2036,
20847
+ "step": 2003
20848
+ },
20849
+ {
20850
+ "epoch": 0.9558788456952063,
20851
+ "grad_norm": 0.5920049724547812,
20852
+ "learning_rate": 5.859788109825793e-07,
20853
+ "loss": 0.2256,
20854
+ "step": 2004
20855
+ },
20856
+ {
20857
+ "epoch": 0.9563558311471501,
20858
+ "grad_norm": 0.49628985797291797,
20859
+ "learning_rate": 5.733336421232027e-07,
20860
+ "loss": 0.1671,
20861
+ "step": 2005
20862
+ },
20863
+ {
20864
+ "epoch": 0.9563558311471501,
20865
+ "eval_PRM Accuracy": 0.9117647058823529,
20866
+ "eval_PRM F1": 0.9387755102040817,
20867
+ "eval_PRM F1 AUC": 0.8863958431743032,
20868
+ "eval_PRM F1 AUC (fixed)": 0.9740198393953707,
20869
+ "eval_PRM F1 Neg": 0.8421052631578947,
20870
+ "eval_PRM NPV": 0.8571428571428571,
20871
+ "eval_PRM Precision": 0.9324324324324325,
20872
+ "eval_PRM Recall": 0.9452054794520548,
20873
+ "eval_PRM Specificty": 0.8275862068965517,
20874
+ "eval_loss": 0.1853582262992859,
20875
+ "eval_runtime": 9.4911,
20876
+ "eval_samples_per_second": 4.636,
20877
+ "eval_steps_per_second": 0.211,
20878
+ "step": 2005
20879
+ },
20880
+ {
20881
+ "epoch": 0.9568328165990937,
20882
+ "grad_norm": 0.46054756457888824,
20883
+ "learning_rate": 5.608256175088699e-07,
20884
+ "loss": 0.1388,
20885
+ "step": 2006
20886
+ },
20887
+ {
20888
+ "epoch": 0.9573098020510374,
20889
+ "grad_norm": 0.6037004635730433,
20890
+ "learning_rate": 5.484547718456024e-07,
20891
+ "loss": 0.1962,
20892
+ "step": 2007
20893
+ },
20894
+ {
20895
+ "epoch": 0.9577867875029812,
20896
+ "grad_norm": 0.7908605629178659,
20897
+ "learning_rate": 5.362211394588201e-07,
20898
+ "loss": 0.2143,
20899
+ "step": 2008
20900
+ },
20901
+ {
20902
+ "epoch": 0.9582637729549248,
20903
+ "grad_norm": 1.2406054827420188,
20904
+ "learning_rate": 5.241247542931815e-07,
20905
+ "loss": 0.3142,
20906
+ "step": 2009
20907
+ },
20908
+ {
20909
+ "epoch": 0.9587407584068686,
20910
+ "grad_norm": 0.4741604532893723,
20911
+ "learning_rate": 5.121656499125604e-07,
20912
+ "loss": 0.202,
20913
+ "step": 2010
20914
+ },
20915
+ {
20916
+ "epoch": 0.9587407584068686,
20917
+ "eval_PRM Accuracy": 0.9215686274509803,
20918
+ "eval_PRM F1": 0.9452054794520548,
20919
+ "eval_PRM F1 AUC": 0.9036372224846481,
20920
+ "eval_PRM F1 AUC (fixed)": 0.9730751062824752,
20921
+ "eval_PRM F1 Neg": 0.8620689655172413,
20922
+ "eval_PRM NPV": 0.8620689655172413,
20923
+ "eval_PRM Precision": 0.9452054794520548,
20924
+ "eval_PRM Recall": 0.9452054794520548,
20925
+ "eval_PRM Specificty": 0.8620689655172413,
20926
+ "eval_loss": 0.1857965588569641,
20927
+ "eval_runtime": 14.288,
20928
+ "eval_samples_per_second": 3.08,
20929
+ "eval_steps_per_second": 0.14,
20930
+ "step": 2010
20931
+ },
20932
+ {
20933
+ "epoch": 0.9592177438588123,
20934
+ "grad_norm": 0.5934312404017598,
20935
+ "learning_rate": 5.003438594998911e-07,
20936
+ "loss": 0.2037,
20937
+ "step": 2011
20938
+ },
20939
+ {
20940
+ "epoch": 0.9596947293107561,
20941
+ "grad_norm": 0.5667813315735417,
20942
+ "learning_rate": 4.88659415857129e-07,
20943
+ "loss": 0.2542,
20944
+ "step": 2012
20945
+ },
20946
+ {
20947
+ "epoch": 0.9601717147626997,
20948
+ "grad_norm": 0.6066873845783456,
20949
+ "learning_rate": 4.771123514050957e-07,
20950
+ "loss": 0.1754,
20951
+ "step": 2013
20952
+ },
20953
+ {
20954
+ "epoch": 0.9606487002146434,
20955
+ "grad_norm": 0.45844057179588416,
20956
+ "learning_rate": 4.6570269818346224e-07,
20957
+ "loss": 0.1702,
20958
+ "step": 2014
20959
+ },
20960
+ {
20961
+ "epoch": 0.9611256856665872,
20962
+ "grad_norm": 0.8627638637774376,
20963
+ "learning_rate": 4.5443048785061007e-07,
20964
+ "loss": 0.1902,
20965
+ "step": 2015
20966
+ },
20967
+ {
20968
+ "epoch": 0.9611256856665872,
20969
+ "eval_PRM Accuracy": 0.9117647058823529,
20970
+ "eval_PRM F1": 0.9387755102040817,
20971
+ "eval_PRM F1 AUC": 0.8863958431743032,
20972
+ "eval_PRM F1 AUC (fixed)": 0.9742560226735947,
20973
+ "eval_PRM F1 Neg": 0.8421052631578947,
20974
+ "eval_PRM NPV": 0.8571428571428571,
20975
+ "eval_PRM Precision": 0.9324324324324325,
20976
+ "eval_PRM Recall": 0.9452054794520548,
20977
+ "eval_PRM Specificty": 0.8275862068965517,
20978
+ "eval_loss": 0.1854802966117859,
20979
+ "eval_runtime": 13.5861,
20980
+ "eval_samples_per_second": 3.239,
20981
+ "eval_steps_per_second": 0.147,
20982
+ "step": 2015
20983
+ },
20984
+ {
20985
+ "epoch": 0.9616026711185309,
20986
+ "grad_norm": 0.5948796848880178,
20987
+ "learning_rate": 4.432957516835534e-07,
20988
+ "loss": 0.1965,
20989
+ "step": 2016
20990
+ },
20991
+ {
20992
+ "epoch": 0.9620796565704746,
20993
+ "grad_norm": 0.6019788555452265,
20994
+ "learning_rate": 4.3229852057785626e-07,
20995
+ "loss": 0.2276,
20996
+ "step": 2017
20997
+ },
20998
+ {
20999
+ "epoch": 0.9625566420224183,
21000
+ "grad_norm": 0.4742278466628085,
21001
+ "learning_rate": 4.2143882504756537e-07,
21002
+ "loss": 0.1158,
21003
+ "step": 2018
21004
+ },
21005
+ {
21006
+ "epoch": 0.963033627474362,
21007
+ "grad_norm": 0.6747526262962028,
21008
+ "learning_rate": 4.107166952250885e-07,
21009
+ "loss": 0.1962,
21010
+ "step": 2019
21011
+ },
21012
+ {
21013
+ "epoch": 0.9635106129263058,
21014
+ "grad_norm": 0.6246184175068561,
21015
+ "learning_rate": 4.0013216086113305e-07,
21016
+ "loss": 0.168,
21017
+ "step": 2020
21018
+ },
21019
+ {
21020
+ "epoch": 0.9635106129263058,
21021
+ "eval_PRM Accuracy": 0.9117647058823529,
21022
+ "eval_PRM F1": 0.9387755102040817,
21023
+ "eval_PRM F1 AUC": 0.8863958431743032,
21024
+ "eval_PRM F1 AUC (fixed)": 0.9744922059518186,
21025
+ "eval_PRM F1 Neg": 0.8421052631578947,
21026
+ "eval_PRM NPV": 0.8571428571428571,
21027
+ "eval_PRM Precision": 0.9324324324324325,
21028
+ "eval_PRM Recall": 0.9452054794520548,
21029
+ "eval_PRM Specificty": 0.8275862068965517,
21030
+ "eval_loss": 0.18552468717098236,
21031
+ "eval_runtime": 14.5016,
21032
+ "eval_samples_per_second": 3.034,
21033
+ "eval_steps_per_second": 0.138,
21034
+ "step": 2020
21035
+ },
21036
+ {
21037
+ "epoch": 0.9639875983782494,
21038
+ "grad_norm": 0.47683381284661164,
21039
+ "learning_rate": 3.8968525132462296e-07,
21040
+ "loss": 0.157,
21041
+ "step": 2021
21042
+ },
21043
+ {
21044
+ "epoch": 0.9644645838301932,
21045
+ "grad_norm": 0.6119940362045991,
21046
+ "learning_rate": 3.793759956026099e-07,
21047
+ "loss": 0.1978,
21048
+ "step": 2022
21049
+ },
21050
+ {
21051
+ "epoch": 0.9649415692821369,
21052
+ "grad_norm": 0.6456948862949443,
21053
+ "learning_rate": 3.692044223002178e-07,
21054
+ "loss": 0.2491,
21055
+ "step": 2023
21056
+ },
21057
+ {
21058
+ "epoch": 0.9654185547340806,
21059
+ "grad_norm": 0.4486105863495388,
21060
+ "learning_rate": 3.5917055964050373e-07,
21061
+ "loss": 0.1459,
21062
+ "step": 2024
21063
+ },
21064
+ {
21065
+ "epoch": 0.9658955401860243,
21066
+ "grad_norm": 0.9606332285377831,
21067
+ "learning_rate": 3.4927443546445283e-07,
21068
+ "loss": 0.256,
21069
+ "step": 2025
21070
+ },
21071
+ {
21072
+ "epoch": 0.9658955401860243,
21073
+ "eval_PRM Accuracy": 0.9117647058823529,
21074
+ "eval_PRM F1": 0.9387755102040817,
21075
+ "eval_PRM F1 AUC": 0.8863958431743032,
21076
+ "eval_PRM F1 AUC (fixed)": 0.9749645725082665,
21077
+ "eval_PRM F1 Neg": 0.8421052631578947,
21078
+ "eval_PRM NPV": 0.8571428571428571,
21079
+ "eval_PRM Precision": 0.9324324324324325,
21080
+ "eval_PRM Recall": 0.9452054794520548,
21081
+ "eval_PRM Specificty": 0.8275862068965517,
21082
+ "eval_loss": 0.1847589612007141,
21083
+ "eval_runtime": 13.8736,
21084
+ "eval_samples_per_second": 3.171,
21085
+ "eval_steps_per_second": 0.144,
21086
+ "step": 2025
21087
+ },
21088
+ {
21089
+ "epoch": 0.966372525637968,
21090
+ "grad_norm": 0.49145718044168607,
21091
+ "learning_rate": 3.395160772308559e-07,
21092
+ "loss": 0.1963,
21093
+ "step": 2026
21094
+ },
21095
+ {
21096
+ "epoch": 0.9668495110899118,
21097
+ "grad_norm": 0.46304627488343814,
21098
+ "learning_rate": 3.2989551201624835e-07,
21099
+ "loss": 0.1743,
21100
+ "step": 2027
21101
+ },
21102
+ {
21103
+ "epoch": 0.9673264965418554,
21104
+ "grad_norm": 0.4999044809958458,
21105
+ "learning_rate": 3.20412766514816e-07,
21106
+ "loss": 0.2058,
21107
+ "step": 2028
21108
+ },
21109
+ {
21110
+ "epoch": 0.9678034819937992,
21111
+ "grad_norm": 0.6597864406972287,
21112
+ "learning_rate": 3.11067867038356e-07,
21113
+ "loss": 0.2058,
21114
+ "step": 2029
21115
+ },
21116
+ {
21117
+ "epoch": 0.9682804674457429,
21118
+ "grad_norm": 0.7226947390131475,
21119
+ "learning_rate": 3.0186083951616595e-07,
21120
+ "loss": 0.2703,
21121
+ "step": 2030
21122
+ },
21123
+ {
21124
+ "epoch": 0.9682804674457429,
21125
+ "eval_PRM Accuracy": 0.9215686274509803,
21126
+ "eval_PRM F1": 0.9452054794520548,
21127
+ "eval_PRM F1 AUC": 0.9036372224846481,
21128
+ "eval_PRM F1 AUC (fixed)": 0.9737836561171469,
21129
+ "eval_PRM F1 Neg": 0.8620689655172413,
21130
+ "eval_PRM NPV": 0.8620689655172413,
21131
+ "eval_PRM Precision": 0.9452054794520548,
21132
+ "eval_PRM Recall": 0.9452054794520548,
21133
+ "eval_PRM Specificty": 0.8620689655172413,
21134
+ "eval_loss": 0.1856023669242859,
21135
+ "eval_runtime": 14.3153,
21136
+ "eval_samples_per_second": 3.074,
21137
+ "eval_steps_per_second": 0.14,
21138
+ "step": 2030
21139
+ },
21140
+ {
21141
+ "epoch": 0.9687574528976867,
21142
+ "grad_norm": 0.7232729910764847,
21143
+ "learning_rate": 2.927917094950106e-07,
21144
+ "loss": 0.205,
21145
+ "step": 2031
21146
+ },
21147
+ {
21148
+ "epoch": 0.9692344383496303,
21149
+ "grad_norm": 0.5531092451895251,
21150
+ "learning_rate": 2.838605021390106e-07,
21151
+ "loss": 0.1434,
21152
+ "step": 2032
21153
+ },
21154
+ {
21155
+ "epoch": 0.969711423801574,
21156
+ "grad_norm": 0.4689768889718397,
21157
+ "learning_rate": 2.7506724222959854e-07,
21158
+ "loss": 0.1638,
21159
+ "step": 2033
21160
+ },
21161
+ {
21162
+ "epoch": 0.9701884092535178,
21163
+ "grad_norm": 0.6236593320179458,
21164
+ "learning_rate": 2.6641195416545196e-07,
21165
+ "loss": 0.2312,
21166
+ "step": 2034
21167
+ },
21168
+ {
21169
+ "epoch": 0.9706653947054615,
21170
+ "grad_norm": 0.7551093880824219,
21171
+ "learning_rate": 2.5789466196241007e-07,
21172
+ "loss": 0.2462,
21173
+ "step": 2035
21174
+ },
21175
+ {
21176
+ "epoch": 0.9706653947054615,
21177
+ "eval_PRM Accuracy": 0.9117647058823529,
21178
+ "eval_PRM F1": 0.9387755102040817,
21179
+ "eval_PRM F1 AUC": 0.8863958431743032,
21180
+ "eval_PRM F1 AUC (fixed)": 0.9728389230042513,
21181
+ "eval_PRM F1 Neg": 0.8421052631578947,
21182
+ "eval_PRM NPV": 0.8571428571428571,
21183
+ "eval_PRM Precision": 0.9324324324324325,
21184
+ "eval_PRM Recall": 0.9452054794520548,
21185
+ "eval_PRM Specificty": 0.8275862068965517,
21186
+ "eval_loss": 0.18592418730258942,
21187
+ "eval_runtime": 13.4713,
21188
+ "eval_samples_per_second": 3.266,
21189
+ "eval_steps_per_second": 0.148,
21190
+ "step": 2035
21191
+ },
21192
+ {
21193
+ "epoch": 0.9711423801574052,
21194
+ "grad_norm": 0.39148638193092083,
21195
+ "learning_rate": 2.49515389253413e-07,
21196
+ "loss": 0.1402,
21197
+ "step": 2036
21198
+ },
21199
+ {
21200
+ "epoch": 0.9716193656093489,
21201
+ "grad_norm": 0.6837049878509137,
21202
+ "learning_rate": 2.41274159288446e-07,
21203
+ "loss": 0.2142,
21204
+ "step": 2037
21205
+ },
21206
+ {
21207
+ "epoch": 0.9720963510612927,
21208
+ "grad_norm": 0.6015805169989217,
21209
+ "learning_rate": 2.3317099493445627e-07,
21210
+ "loss": 0.2208,
21211
+ "step": 2038
21212
+ },
21213
+ {
21214
+ "epoch": 0.9725733365132363,
21215
+ "grad_norm": 0.4847482121424171,
21216
+ "learning_rate": 2.2520591867531415e-07,
21217
+ "loss": 0.1811,
21218
+ "step": 2039
21219
+ },
21220
+ {
21221
+ "epoch": 0.97305032196518,
21222
+ "grad_norm": 0.59574749022119,
21223
+ "learning_rate": 2.173789526117187e-07,
21224
+ "loss": 0.1843,
21225
+ "step": 2040
21226
+ },
21227
+ {
21228
+ "epoch": 0.97305032196518,
21229
+ "eval_PRM Accuracy": 0.9215686274509803,
21230
+ "eval_PRM F1": 0.9452054794520548,
21231
+ "eval_PRM F1 AUC": 0.9036372224846481,
21232
+ "eval_PRM F1 AUC (fixed)": 0.9749645725082665,
21233
+ "eval_PRM F1 Neg": 0.8620689655172413,
21234
+ "eval_PRM NPV": 0.8620689655172413,
21235
+ "eval_PRM Precision": 0.9452054794520548,
21236
+ "eval_PRM Recall": 0.9452054794520548,
21237
+ "eval_PRM Specificty": 0.8620689655172413,
21238
+ "eval_loss": 0.18514181673526764,
21239
+ "eval_runtime": 13.4517,
21240
+ "eval_samples_per_second": 3.271,
21241
+ "eval_steps_per_second": 0.149,
21242
+ "step": 2040
21243
+ },
21244
+ {
21245
+ "epoch": 0.9735273074171238,
21246
+ "grad_norm": 0.5741048128836703,
21247
+ "learning_rate": 2.0969011846116992e-07,
21248
+ "loss": 0.2053,
21249
+ "step": 2041
21250
+ },
21251
+ {
21252
+ "epoch": 0.9740042928690675,
21253
+ "grad_norm": 0.5915322378907251,
21254
+ "learning_rate": 2.021394375578911e-07,
21255
+ "loss": 0.208,
21256
+ "step": 2042
21257
+ },
21258
+ {
21259
+ "epoch": 0.9744812783210112,
21260
+ "grad_norm": 0.49623775659482605,
21261
+ "learning_rate": 1.9472693085276218e-07,
21262
+ "loss": 0.1829,
21263
+ "step": 2043
21264
+ },
21265
+ {
21266
+ "epoch": 0.9749582637729549,
21267
+ "grad_norm": 0.8456364278385442,
21268
+ "learning_rate": 1.8745261891327526e-07,
21269
+ "loss": 0.3239,
21270
+ "step": 2044
21271
+ },
21272
+ {
21273
+ "epoch": 0.9754352492248987,
21274
+ "grad_norm": 0.5170849611642481,
21275
+ "learning_rate": 1.8031652192347925e-07,
21276
+ "loss": 0.2037,
21277
+ "step": 2045
21278
+ },
21279
+ {
21280
+ "epoch": 0.9754352492248987,
21281
+ "eval_PRM Accuracy": 0.9117647058823529,
21282
+ "eval_PRM F1": 0.9387755102040817,
21283
+ "eval_PRM F1 AUC": 0.8863958431743032,
21284
+ "eval_PRM F1 AUC (fixed)": 0.9747283892300425,
21285
+ "eval_PRM F1 Neg": 0.8421052631578947,
21286
+ "eval_PRM NPV": 0.8571428571428571,
21287
+ "eval_PRM Precision": 0.9324324324324325,
21288
+ "eval_PRM Recall": 0.9452054794520548,
21289
+ "eval_PRM Specificty": 0.8275862068965517,
21290
+ "eval_loss": 0.18571333587169647,
21291
+ "eval_runtime": 13.6831,
21292
+ "eval_samples_per_second": 3.216,
21293
+ "eval_steps_per_second": 0.146,
21294
+ "step": 2045
21295
+ },
21296
+ {
21297
+ "epoch": 0.9759122346768424,
21298
+ "grad_norm": 0.6496734494944529,
21299
+ "learning_rate": 1.7331865968391314e-07,
21300
+ "loss": 0.1692,
21301
+ "step": 2046
21302
+ },
21303
+ {
21304
+ "epoch": 0.976389220128786,
21305
+ "grad_norm": 0.5594479369809225,
21306
+ "learning_rate": 1.6645905161155606e-07,
21307
+ "loss": 0.1582,
21308
+ "step": 2047
21309
+ },
21310
+ {
21311
+ "epoch": 0.9768662055807298,
21312
+ "grad_norm": 0.7823614945934644,
21313
+ "learning_rate": 1.5973771673976623e-07,
21314
+ "loss": 0.316,
21315
+ "step": 2048
21316
+ },
21317
+ {
21318
+ "epoch": 0.9773431910326735,
21319
+ "grad_norm": 0.43263496968572235,
21320
+ "learning_rate": 1.5315467371824765e-07,
21321
+ "loss": 0.1619,
21322
+ "step": 2049
21323
+ },
21324
+ {
21325
+ "epoch": 0.9778201764846172,
21326
+ "grad_norm": 0.5713807971055265,
21327
+ "learning_rate": 1.4670994081297795e-07,
21328
+ "loss": 0.2004,
21329
+ "step": 2050
21330
+ },
21331
+ {
21332
+ "epoch": 0.9778201764846172,
21333
+ "eval_PRM Accuracy": 0.9117647058823529,
21334
+ "eval_PRM F1": 0.9387755102040817,
21335
+ "eval_PRM F1 AUC": 0.8863958431743032,
21336
+ "eval_PRM F1 AUC (fixed)": 0.9737836561171469,
21337
+ "eval_PRM F1 Neg": 0.8421052631578947,
21338
+ "eval_PRM NPV": 0.8571428571428571,
21339
+ "eval_PRM Precision": 0.9324324324324325,
21340
+ "eval_PRM Recall": 0.9452054794520548,
21341
+ "eval_PRM Specificty": 0.8275862068965517,
21342
+ "eval_loss": 0.18510852754116058,
21343
+ "eval_runtime": 13.7396,
21344
+ "eval_samples_per_second": 3.202,
21345
+ "eval_steps_per_second": 0.146,
21346
+ "step": 2050
21347
+ },
21348
+ {
21349
+ "epoch": 0.9782971619365609,
21350
+ "grad_norm": 0.6660396602843291,
21351
+ "learning_rate": 1.4040353590616395e-07,
21352
+ "loss": 0.2063,
21353
+ "step": 2051
21354
+ },
21355
+ {
21356
+ "epoch": 0.9787741473885047,
21357
+ "grad_norm": 0.49585433430640596,
21358
+ "learning_rate": 1.3423547649620838e-07,
21359
+ "loss": 0.1868,
21360
+ "step": 2052
21361
+ },
21362
+ {
21363
+ "epoch": 0.9792511328404484,
21364
+ "grad_norm": 0.555056426242968,
21365
+ "learning_rate": 1.2820577969762104e-07,
21366
+ "loss": 0.2221,
21367
+ "step": 2053
21368
+ },
21369
+ {
21370
+ "epoch": 0.979728118292392,
21371
+ "grad_norm": 0.6521947149470015,
21372
+ "learning_rate": 1.2231446224101327e-07,
21373
+ "loss": 0.2179,
21374
+ "step": 2054
21375
+ },
21376
+ {
21377
+ "epoch": 0.9802051037443358,
21378
+ "grad_norm": 0.4868305540978795,
21379
+ "learning_rate": 1.1656154047303691e-07,
21380
+ "loss": 0.1237,
21381
+ "step": 2055
21382
+ },
21383
+ {
21384
+ "epoch": 0.9802051037443358,
21385
+ "eval_PRM Accuracy": 0.9215686274509803,
21386
+ "eval_PRM F1": 0.9452054794520548,
21387
+ "eval_PRM F1 AUC": 0.9036372224846481,
21388
+ "eval_PRM F1 AUC (fixed)": 0.9737836561171469,
21389
+ "eval_PRM F1 Neg": 0.8620689655172413,
21390
+ "eval_PRM NPV": 0.8620689655172413,
21391
+ "eval_PRM Precision": 0.9452054794520548,
21392
+ "eval_PRM Recall": 0.9452054794520548,
21393
+ "eval_PRM Specificty": 0.8620689655172413,
21394
+ "eval_loss": 0.18632923066616058,
21395
+ "eval_runtime": 13.6312,
21396
+ "eval_samples_per_second": 3.228,
21397
+ "eval_steps_per_second": 0.147,
21398
+ "step": 2055
21399
+ },
21400
+ {
21401
+ "epoch": 0.9806820891962795,
21402
+ "grad_norm": 0.5598366438944131,
21403
+ "learning_rate": 1.1094703035633425e-07,
21404
+ "loss": 0.2018,
21405
+ "step": 2056
21406
+ },
21407
+ {
21408
+ "epoch": 0.9811590746482233,
21409
+ "grad_norm": 0.49904654459510944,
21410
+ "learning_rate": 1.0547094746949371e-07,
21411
+ "loss": 0.1783,
21412
+ "step": 2057
21413
+ },
21414
+ {
21415
+ "epoch": 0.9816360601001669,
21416
+ "grad_norm": 0.5393112228505368,
21417
+ "learning_rate": 1.001333070070054e-07,
21418
+ "loss": 0.1675,
21419
+ "step": 2058
21420
+ },
21421
+ {
21422
+ "epoch": 0.9821130455521107,
21423
+ "grad_norm": 0.578826032904967,
21424
+ "learning_rate": 9.493412377923339e-08,
21425
+ "loss": 0.1948,
21426
+ "step": 2059
21427
+ },
21428
+ {
21429
+ "epoch": 0.9825900310040544,
21430
+ "grad_norm": 0.5392258489199684,
21431
+ "learning_rate": 8.987341221235457e-08,
21432
+ "loss": 0.1956,
21433
+ "step": 2060
21434
+ },
21435
+ {
21436
+ "epoch": 0.9825900310040544,
21437
+ "eval_PRM Accuracy": 0.9215686274509803,
21438
+ "eval_PRM F1": 0.9452054794520548,
21439
+ "eval_PRM F1 AUC": 0.9036372224846481,
21440
+ "eval_PRM F1 AUC (fixed)": 0.9749645725082663,
21441
+ "eval_PRM F1 Neg": 0.8620689655172413,
21442
+ "eval_PRM NPV": 0.8620689655172413,
21443
+ "eval_PRM Precision": 0.9452054794520548,
21444
+ "eval_PRM Recall": 0.9452054794520548,
21445
+ "eval_PRM Specificty": 0.8620689655172413,
21446
+ "eval_loss": 0.18599630892276764,
21447
+ "eval_runtime": 14.498,
21448
+ "eval_samples_per_second": 3.035,
21449
+ "eval_steps_per_second": 0.138,
21450
+ "step": 2060
21451
+ },
21452
+ {
21453
+ "epoch": 0.9830670164559981,
21454
+ "grad_norm": 0.6616701345184373,
21455
+ "learning_rate": 8.495118634833655e-08,
21456
+ "loss": 0.1429,
21457
+ "step": 2061
21458
+ },
21459
+ {
21460
+ "epoch": 0.9835440019079418,
21461
+ "grad_norm": 0.7130294708477759,
21462
+ "learning_rate": 8.016745984488206e-08,
21463
+ "loss": 0.1512,
21464
+ "step": 2062
21465
+ },
21466
+ {
21467
+ "epoch": 0.9840209873598855,
21468
+ "grad_norm": 0.6976150540958784,
21469
+ "learning_rate": 7.552224597541235e-08,
21470
+ "loss": 0.23,
21471
+ "step": 2063
21472
+ },
21473
+ {
21474
+ "epoch": 0.9844979728118293,
21475
+ "grad_norm": 0.5420611759772007,
21476
+ "learning_rate": 7.101555762900614e-08,
21477
+ "loss": 0.2171,
21478
+ "step": 2064
21479
+ },
21480
+ {
21481
+ "epoch": 0.9849749582637729,
21482
+ "grad_norm": 0.46001325692877904,
21483
+ "learning_rate": 6.664740731037733e-08,
21484
+ "loss": 0.1264,
21485
+ "step": 2065
21486
+ },
21487
+ {
21488
+ "epoch": 0.9849749582637729,
21489
+ "eval_PRM Accuracy": 0.9117647058823529,
21490
+ "eval_PRM F1": 0.9387755102040817,
21491
+ "eval_PRM F1 AUC": 0.8863958431743032,
21492
+ "eval_PRM F1 AUC (fixed)": 0.9749645725082664,
21493
+ "eval_PRM F1 Neg": 0.8421052631578947,
21494
+ "eval_PRM NPV": 0.8571428571428571,
21495
+ "eval_PRM Precision": 0.9324324324324325,
21496
+ "eval_PRM Recall": 0.9452054794520548,
21497
+ "eval_PRM Specificty": 0.8275862068965517,
21498
+ "eval_loss": 0.1859685778617859,
21499
+ "eval_runtime": 14.424,
21500
+ "eval_samples_per_second": 3.05,
21501
+ "eval_steps_per_second": 0.139,
21502
+ "step": 2065
21503
+ },
21504
+ {
21505
+ "epoch": 0.9854519437157166,
21506
+ "grad_norm": 0.6372943877733731,
21507
+ "learning_rate": 6.241780713983624e-08,
21508
+ "loss": 0.1407,
21509
+ "step": 2066
21510
+ },
21511
+ {
21512
+ "epoch": 0.9859289291676604,
21513
+ "grad_norm": 0.8129639240032717,
21514
+ "learning_rate": 5.8326768853261825e-08,
21515
+ "loss": 0.1824,
21516
+ "step": 2067
21517
+ },
21518
+ {
21519
+ "epoch": 0.9864059146196041,
21520
+ "grad_norm": 0.4987915008637717,
21521
+ "learning_rate": 5.437430380206832e-08,
21522
+ "loss": 0.1687,
21523
+ "step": 2068
21524
+ },
21525
+ {
21526
+ "epoch": 0.9868829000715478,
21527
+ "grad_norm": 0.45563934893068897,
21528
+ "learning_rate": 5.056042295316643e-08,
21529
+ "loss": 0.1195,
21530
+ "step": 2069
21531
+ },
21532
+ {
21533
+ "epoch": 0.9873598855234915,
21534
+ "grad_norm": 0.6072972937744073,
21535
+ "learning_rate": 4.688513688893004e-08,
21536
+ "loss": 0.2232,
21537
+ "step": 2070
21538
+ },
21539
+ {
21540
+ "epoch": 0.9873598855234915,
21541
+ "eval_PRM Accuracy": 0.9117647058823529,
21542
+ "eval_PRM F1": 0.9387755102040817,
21543
+ "eval_PRM F1 AUC": 0.8863958431743032,
21544
+ "eval_PRM F1 AUC (fixed)": 0.9749645725082665,
21545
+ "eval_PRM F1 Neg": 0.8421052631578947,
21546
+ "eval_PRM NPV": 0.8571428571428571,
21547
+ "eval_PRM Precision": 0.9324324324324325,
21548
+ "eval_PRM Recall": 0.9452054794520548,
21549
+ "eval_PRM Specificty": 0.8275862068965517,
21550
+ "eval_loss": 0.185791015625,
21551
+ "eval_runtime": 13.7396,
21552
+ "eval_samples_per_second": 3.202,
21553
+ "eval_steps_per_second": 0.146,
21554
+ "step": 2070
21555
+ },
21556
+ {
21557
+ "epoch": 0.9878368709754353,
21558
+ "grad_norm": 0.7779254486583322,
21559
+ "learning_rate": 4.334845580718505e-08,
21560
+ "loss": 0.3258,
21561
+ "step": 2071
21562
+ },
21563
+ {
21564
+ "epoch": 0.988313856427379,
21565
+ "grad_norm": 0.5548725465171946,
21566
+ "learning_rate": 3.995038952117058e-08,
21567
+ "loss": 0.2461,
21568
+ "step": 2072
21569
+ },
21570
+ {
21571
+ "epoch": 0.9887908418793226,
21572
+ "grad_norm": 0.6449785112740309,
21573
+ "learning_rate": 3.669094745950008e-08,
21574
+ "loss": 0.1826,
21575
+ "step": 2073
21576
+ },
21577
+ {
21578
+ "epoch": 0.9892678273312664,
21579
+ "grad_norm": 0.58356724744049,
21580
+ "learning_rate": 3.357013866615022e-08,
21581
+ "loss": 0.2221,
21582
+ "step": 2074
21583
+ },
21584
+ {
21585
+ "epoch": 0.9897448127832101,
21586
+ "grad_norm": 0.3475024641711149,
21587
+ "learning_rate": 3.058797180044426e-08,
21588
+ "loss": 0.1303,
21589
+ "step": 2075
21590
+ },
21591
+ {
21592
+ "epoch": 0.9897448127832101,
21593
+ "eval_PRM Accuracy": 0.9215686274509803,
21594
+ "eval_PRM F1": 0.9452054794520548,
21595
+ "eval_PRM F1 AUC": 0.9036372224846481,
21596
+ "eval_PRM F1 AUC (fixed)": 0.9728389230042513,
21597
+ "eval_PRM F1 Neg": 0.8620689655172413,
21598
+ "eval_PRM NPV": 0.8620689655172413,
21599
+ "eval_PRM Precision": 0.9452054794520548,
21600
+ "eval_PRM Recall": 0.9452054794520548,
21601
+ "eval_PRM Specificty": 0.8620689655172413,
21602
+ "eval_loss": 0.18559126555919647,
21603
+ "eval_runtime": 13.3317,
21604
+ "eval_samples_per_second": 3.3,
21605
+ "eval_steps_per_second": 0.15,
21606
+ "step": 2075
21607
+ },
21608
+ {
21609
+ "epoch": 0.9902217982351539,
21610
+ "grad_norm": 0.41991971109136683,
21611
+ "learning_rate": 2.7744455136990976e-08,
21612
+ "loss": 0.1326,
21613
+ "step": 2076
21614
+ },
21615
+ {
21616
+ "epoch": 0.9906987836870975,
21617
+ "grad_norm": 0.6015953672635286,
21618
+ "learning_rate": 2.503959656571242e-08,
21619
+ "loss": 0.2018,
21620
+ "step": 2077
21621
+ },
21622
+ {
21623
+ "epoch": 0.9911757691390413,
21624
+ "grad_norm": 0.6075786716834879,
21625
+ "learning_rate": 2.2473403591777297e-08,
21626
+ "loss": 0.2541,
21627
+ "step": 2078
21628
+ },
21629
+ {
21630
+ "epoch": 0.991652754590985,
21631
+ "grad_norm": 0.49201559098251413,
21632
+ "learning_rate": 2.004588333560653e-08,
21633
+ "loss": 0.1541,
21634
+ "step": 2079
21635
+ },
21636
+ {
21637
+ "epoch": 0.9921297400429286,
21638
+ "grad_norm": 0.6828738882424833,
21639
+ "learning_rate": 1.7757042532845493e-08,
21640
+ "loss": 0.161,
21641
+ "step": 2080
21642
+ },
21643
+ {
21644
+ "epoch": 0.9921297400429286,
21645
+ "eval_PRM Accuracy": 0.9215686274509803,
21646
+ "eval_PRM F1": 0.9452054794520548,
21647
+ "eval_PRM F1 AUC": 0.9036372224846481,
21648
+ "eval_PRM F1 AUC (fixed)": 0.9740198393953707,
21649
+ "eval_PRM F1 Neg": 0.8620689655172413,
21650
+ "eval_PRM NPV": 0.8620689655172413,
21651
+ "eval_PRM Precision": 0.9452054794520548,
21652
+ "eval_PRM Recall": 0.9452054794520548,
21653
+ "eval_PRM Specificty": 0.8620689655172413,
21654
+ "eval_loss": 0.1857244372367859,
21655
+ "eval_runtime": 13.6084,
21656
+ "eval_samples_per_second": 3.233,
21657
+ "eval_steps_per_second": 0.147,
21658
+ "step": 2080
21659
+ },
21660
+ {
21661
+ "epoch": 0.9926067254948724,
21662
+ "grad_norm": 0.7375947051401394,
21663
+ "learning_rate": 1.5606887534341807e-08,
21664
+ "loss": 0.2017,
21665
+ "step": 2081
21666
+ },
21667
+ {
21668
+ "epoch": 0.9930837109468161,
21669
+ "grad_norm": 0.6238346413953868,
21670
+ "learning_rate": 1.3595424306139804e-08,
21671
+ "loss": 0.257,
21672
+ "step": 2082
21673
+ },
21674
+ {
21675
+ "epoch": 0.9935606963987599,
21676
+ "grad_norm": 0.4652925076099241,
21677
+ "learning_rate": 1.1722658429441646e-08,
21678
+ "loss": 0.1492,
21679
+ "step": 2083
21680
+ },
21681
+ {
21682
+ "epoch": 0.9940376818507035,
21683
+ "grad_norm": 0.4562941808058073,
21684
+ "learning_rate": 9.988595100612897e-09,
21685
+ "loss": 0.175,
21686
+ "step": 2084
21687
+ },
21688
+ {
21689
+ "epoch": 0.9945146673026473,
21690
+ "grad_norm": 0.3897288159212509,
21691
+ "learning_rate": 8.393239131171404e-09,
21692
+ "loss": 0.1457,
21693
+ "step": 2085
21694
+ },
21695
+ {
21696
+ "epoch": 0.9945146673026473,
21697
+ "eval_PRM Accuracy": 0.9215686274509803,
21698
+ "eval_PRM F1": 0.9452054794520548,
21699
+ "eval_PRM F1 AUC": 0.9036372224846481,
21700
+ "eval_PRM F1 AUC (fixed)": 0.9735474728389231,
21701
+ "eval_PRM F1 Neg": 0.8620689655172413,
21702
+ "eval_PRM NPV": 0.8620689655172413,
21703
+ "eval_PRM Precision": 0.9452054794520548,
21704
+ "eval_PRM Recall": 0.9452054794520548,
21705
+ "eval_PRM Specificty": 0.8620689655172413,
21706
+ "eval_loss": 0.18607954680919647,
21707
+ "eval_runtime": 14.1612,
21708
+ "eval_samples_per_second": 3.107,
21709
+ "eval_steps_per_second": 0.141,
21710
+ "step": 2085
21711
+ },
21712
+ {
21713
+ "epoch": 0.994991652754591,
21714
+ "grad_norm": 0.7132931890723386,
21715
+ "learning_rate": 6.936594947742902e-09,
21716
+ "loss": 0.2117,
21717
+ "step": 2086
21718
+ },
21719
+ {
21720
+ "epoch": 0.9954686382065348,
21721
+ "grad_norm": 0.6171413474024248,
21722
+ "learning_rate": 5.618666592088761e-09,
21723
+ "loss": 0.2049,
21724
+ "step": 2087
21725
+ },
21726
+ {
21727
+ "epoch": 0.9959456236584784,
21728
+ "grad_norm": 0.3289005124173065,
21729
+ "learning_rate": 4.43945772106158e-09,
21730
+ "loss": 0.1137,
21731
+ "step": 2088
21732
+ },
21733
+ {
21734
+ "epoch": 0.9964226091104221,
21735
+ "grad_norm": 0.694145334823455,
21736
+ "learning_rate": 3.3989716066162856e-09,
21737
+ "loss": 0.2319,
21738
+ "step": 2089
21739
+ },
21740
+ {
21741
+ "epoch": 0.9968995945623659,
21742
+ "grad_norm": 0.5523620831764013,
21743
+ "learning_rate": 2.497211135787936e-09,
21744
+ "loss": 0.1545,
21745
+ "step": 2090
21746
+ },
21747
+ {
21748
+ "epoch": 0.9968995945623659,
21749
+ "eval_PRM Accuracy": 0.9117647058823529,
21750
+ "eval_PRM F1": 0.9387755102040817,
21751
+ "eval_PRM F1 AUC": 0.8863958431743032,
21752
+ "eval_PRM F1 AUC (fixed)": 0.9744922059518186,
21753
+ "eval_PRM F1 Neg": 0.8421052631578947,
21754
+ "eval_PRM NPV": 0.8571428571428571,
21755
+ "eval_PRM Precision": 0.9324324324324325,
21756
+ "eval_PRM Recall": 0.9452054794520548,
21757
+ "eval_PRM Specificty": 0.8275862068965517,
21758
+ "eval_loss": 0.18575216829776764,
21759
+ "eval_runtime": 14.1793,
21760
+ "eval_samples_per_second": 3.103,
21761
+ "eval_steps_per_second": 0.141,
21762
+ "step": 2090
21763
+ },
21764
+ {
21765
+ "epoch": 0.9973765800143095,
21766
+ "grad_norm": 0.5435729439876591,
21767
+ "learning_rate": 1.7341788106972624e-09,
21768
+ "loss": 0.1771,
21769
+ "step": 2091
21770
+ },
21771
+ {
21772
+ "epoch": 0.9978535654662533,
21773
+ "grad_norm": 0.5245664971355852,
21774
+ "learning_rate": 1.1098767485284712e-09,
21775
+ "loss": 0.1802,
21776
+ "step": 2092
21777
+ },
21778
+ {
21779
+ "epoch": 0.998330550918197,
21780
+ "grad_norm": 0.5755856092820003,
21781
+ "learning_rate": 6.243066815347919e-10,
21782
+ "loss": 0.17,
21783
+ "step": 2093
21784
+ },
21785
+ {
21786
+ "epoch": 0.9988075363701407,
21787
+ "grad_norm": 0.5415094526687191,
21788
+ "learning_rate": 2.7746995702737644e-10,
21789
+ "loss": 0.1711,
21790
+ "step": 2094
21791
+ },
21792
+ {
21793
+ "epoch": 0.9992845218220844,
21794
+ "grad_norm": 0.5985415814340851,
21795
+ "learning_rate": 6.936753737529777e-11,
21796
+ "loss": 0.2475,
21797
+ "step": 2095
21798
+ },
21799
+ {
21800
+ "epoch": 0.9992845218220844,
21801
+ "eval_PRM Accuracy": 0.9117647058823529,
21802
+ "eval_PRM F1": 0.9387755102040817,
21803
+ "eval_PRM F1 AUC": 0.8863958431743032,
21804
+ "eval_PRM F1 AUC (fixed)": 0.9737836561171469,
21805
+ "eval_PRM F1 Neg": 0.8421052631578947,
21806
+ "eval_PRM NPV": 0.8571428571428571,
21807
+ "eval_PRM Precision": 0.9324324324324325,
21808
+ "eval_PRM Recall": 0.9452054794520548,
21809
+ "eval_PRM Specificty": 0.8275862068965517,
21810
+ "eval_loss": 0.18576326966285706,
21811
+ "eval_runtime": 14.0597,
21812
+ "eval_samples_per_second": 3.13,
21813
+ "eval_steps_per_second": 0.142,
21814
+ "step": 2095
21815
+ },
21816
+ {
21817
+ "epoch": 0.9997615072740281,
21818
+ "grad_norm": 0.6252395976260101,
21819
+ "learning_rate": 0.0,
21820
+ "loss": 0.2175,
21821
+ "step": 2096
21822
  }
21823
  ],
21824
  "logging_steps": 1,
 
21833
  "should_evaluate": false,
21834
  "should_log": false,
21835
  "should_save": true,
21836
+ "should_training_stop": true
21837
  },
21838
  "attributes": {}
21839
  }
21840
  },
21841
+ "total_flos": 2401999876456448.0,
21842
  "train_batch_size": 2,
21843
  "trial_name": null,
21844
  "trial_params": null