smirki commited on
Commit
fd8937a
·
verified ·
1 Parent(s): b0cb717

Training in progress, step 750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:005b3402f16489e2f67dfdc3d23e9b70931743c9bfd90fbe0b9df9c0fcaa7181
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df9b8df9a3bcfa7793084f2c491e074f5bd3c4876f1de283980446d052bd9416
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87812fd1302b0602159fd01cdae3acd4150d67e06d7b2daaf6949356ea5d360e
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa436fad63bef13e1a414cbf3f838549d7499163af0e47cc9779ce02592bd406
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f14edd6888bcd521e87d23d882bc9d0658a0613355ce894e4f94a5b1eda5455
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7c52c324bc53ae464edbc3c1cfa99789369f21c7d8544ae8559cb215adb050
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36288734d02808044e8b560e68ecd8751d3692a2ff33e2976d9cbec75c95c60f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a89f840a88cd313584cc217e93a9306c165d4938e58831fcfab48fea824fe7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.005774085902469716,
5
  "eval_steps": 500,
6
- "global_step": 725,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -871,6 +871,42 @@
871
  "reward_std": 0.25477964654564855,
872
  "rewards/custom_reward_simplified_v7_dblog": 0.634375,
873
  "step": 720
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
  }
875
  ],
876
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.005973192312899706,
5
  "eval_steps": 500,
6
+ "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
871
  "reward_std": 0.25477964654564855,
872
  "rewards/custom_reward_simplified_v7_dblog": 0.634375,
873
  "step": 720
874
+ },
875
+ {
876
+ "completion_length": 607.86875,
877
+ "epoch": 0.005813907184555714,
878
+ "grad_norm": 0.20680995285511017,
879
+ "kl": 0.0055589195340871814,
880
+ "learning_rate": 4.391536957168733e-06,
881
+ "loss": 0.0002,
882
+ "reward": 0.8,
883
+ "reward_std": 0.32480863481760025,
884
+ "rewards/custom_reward_simplified_v7_dblog": 0.8,
885
+ "step": 730
886
+ },
887
+ {
888
+ "completion_length": 674.13125,
889
+ "epoch": 0.00589354974872771,
890
+ "grad_norm": 0.005594769027084112,
891
+ "kl": 0.005972519854549318,
892
+ "learning_rate": 4.367562678102491e-06,
893
+ "loss": 0.0002,
894
+ "reward": 0.665625,
895
+ "reward_std": 0.20820673778653145,
896
+ "rewards/custom_reward_simplified_v7_dblog": 0.665625,
897
+ "step": 740
898
+ },
899
+ {
900
+ "completion_length": 639.69375,
901
+ "epoch": 0.005973192312899706,
902
+ "grad_norm": 0.11012833565473557,
903
+ "kl": 0.005814655229914934,
904
+ "learning_rate": 4.34319334202531e-06,
905
+ "loss": 0.0002,
906
+ "reward": 0.796875,
907
+ "reward_std": 0.34761993661522866,
908
+ "rewards/custom_reward_simplified_v7_dblog": 0.796875,
909
+ "step": 750
910
  }
911
  ],
912
  "logging_steps": 10,