smirki commited on
Commit
277f1b5
·
verified ·
1 Parent(s): f152b47

Training in progress, step 725, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5a04e84830bbd6a9d15a7f1b7837c35b2c5aa3c3d810fb936a39fabd501f732
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005b3402f16489e2f67dfdc3d23e9b70931743c9bfd90fbe0b9df9c0fcaa7181
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9d953155e50b0ab53fcf21d7698c3548b16bde8faffe38265b83b4c176691fe
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87812fd1302b0602159fd01cdae3acd4150d67e06d7b2daaf6949356ea5d360e
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d69b3ec032c894f09c29bf70e7979ef1f260d871795cce9f40bc39aed3eb516f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f14edd6888bcd521e87d23d882bc9d0658a0613355ce894e4f94a5b1eda5455
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29fe942acb075168b131d317a2f6faee5c51c7e00aad3609dac6f7c5e3261669
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36288734d02808044e8b560e68ecd8751d3692a2ff33e2976d9cbec75c95c60f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.005574979492039726,
5
  "eval_steps": 500,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -847,6 +847,30 @@
847
  "reward_std": 0.34091843143105505,
848
  "rewards/custom_reward_simplified_v7_dblog": 0.80625,
849
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
850
  }
851
  ],
852
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.005774085902469716,
5
  "eval_steps": 500,
6
+ "global_step": 725,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
847
  "reward_std": 0.34091843143105505,
848
  "rewards/custom_reward_simplified_v7_dblog": 0.80625,
849
  "step": 700
850
+ },
851
+ {
852
+ "completion_length": 725.675,
853
+ "epoch": 0.005654622056211722,
854
+ "grad_norm": 0.18473494052886963,
855
+ "kl": 0.005652935197576881,
856
+ "learning_rate": 4.438280143203665e-06,
857
+ "loss": 0.0002,
858
+ "reward": 0.66875,
859
+ "reward_std": 0.216452856361866,
860
+ "rewards/custom_reward_simplified_v7_dblog": 0.66875,
861
+ "step": 710
862
+ },
863
+ {
864
+ "completion_length": 764.26875,
865
+ "epoch": 0.005734264620383718,
866
+ "grad_norm": 0.17735017836093903,
867
+ "kl": 0.005824547982774675,
868
+ "learning_rate": 4.415111107797445e-06,
869
+ "loss": 0.0002,
870
+ "reward": 0.634375,
871
+ "reward_std": 0.25477964654564855,
872
+ "rewards/custom_reward_simplified_v7_dblog": 0.634375,
873
+ "step": 720
874
  }
875
  ],
876
  "logging_steps": 10,