smirki commited on
Commit
644565a
·
verified ·
1 Parent(s): 0f6667b

Training in progress, step 650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8552145847bfeab4636144433c43db62dcc91675e6d80bf8a61ef9290e9fc440
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7927585b38c284985305607dff1216956b2ec6766011237d98f764aa65d8a68
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eea82d271c17fd83a4656876cadcc33c1faa5ce883f21449769fbb145e1864a9
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad7ca2ba371f72eb8e0be449ebbd259143484bb427530b2555406e081648b613
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee66f0b7a716e07b51f02c7e7cb42ecc8627dd065518adcf55ce909d0ef818d8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7455eeaa206bf36d4b15096f6439d20efc5b6e48cc80079c496864649b7e925
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c12eb55eb955504de5fa1d8a599d32de65813670b5001ef30e45d7a6b99dbe1f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6841cf52b84fc804fe9e82d0922149bacf5c720074060a8df77995cba4f40aaa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.004778553850319765,
5
  "eval_steps": 500,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -727,6 +727,66 @@
727
  "reward_std": 0.1257291093468666,
728
  "rewards/custom_reward_logic_v2": 0.12624999806284903,
729
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  }
731
  ],
732
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.005176766671179746,
5
  "eval_steps": 500,
6
+ "global_step": 650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
727
  "reward_std": 0.1257291093468666,
728
  "rewards/custom_reward_logic_v2": 0.12624999806284903,
729
  "step": 600
730
+ },
731
+ {
732
+ "completion_length": 20.73125,
733
+ "epoch": 0.004858196414491761,
734
+ "grad_norm": 1.0636727809906006,
735
+ "kl": 0.32965768277645113,
736
+ "learning_rate": 8.110244809608494e-07,
737
+ "loss": 0.0132,
738
+ "reward": 0.1900000013411045,
739
+ "reward_std": 0.2276224449276924,
740
+ "rewards/custom_reward_logic_v2": 0.1900000013411045,
741
+ "step": 610
742
+ },
743
+ {
744
+ "completion_length": 21.09375,
745
+ "epoch": 0.004937838978663757,
746
+ "grad_norm": 0.6413007378578186,
747
+ "kl": 0.4171911731362343,
748
+ "learning_rate": 7.322330470336314e-07,
749
+ "loss": 0.0167,
750
+ "reward": 0.06624999977648258,
751
+ "reward_std": 0.08008950427174569,
752
+ "rewards/custom_reward_logic_v2": 0.06624999977648258,
753
+ "step": 620
754
+ },
755
+ {
756
+ "completion_length": 19.6125,
757
+ "epoch": 0.005017481542835753,
758
+ "grad_norm": 0.4128471612930298,
759
+ "kl": 0.3929149940609932,
760
+ "learning_rate": 6.568066579746901e-07,
761
+ "loss": 0.0157,
762
+ "reward": 0.1518750011920929,
763
+ "reward_std": 0.09802244454622269,
764
+ "rewards/custom_reward_logic_v2": 0.1518750011920929,
765
+ "step": 630
766
+ },
767
+ {
768
+ "completion_length": 18.51875,
769
+ "epoch": 0.00509712410700775,
770
+ "grad_norm": 1.2197966575622559,
771
+ "kl": 0.4328078910708427,
772
+ "learning_rate": 5.848888922025553e-07,
773
+ "loss": 0.0173,
774
+ "reward": 0.045000001043081286,
775
+ "reward_std": 0.08135274946689605,
776
+ "rewards/custom_reward_logic_v2": 0.045000001043081286,
777
+ "step": 640
778
+ },
779
+ {
780
+ "completion_length": 19.89375,
781
+ "epoch": 0.005176766671179746,
782
+ "grad_norm": 0.24087023735046387,
783
+ "kl": 0.39142851531505585,
784
+ "learning_rate": 5.166166492719124e-07,
785
+ "loss": 0.0157,
786
+ "reward": 0.0818750023841858,
787
+ "reward_std": 0.11013087928295136,
788
+ "rewards/custom_reward_logic_v2": 0.0818750023841858,
789
+ "step": 650
790
  }
791
  ],
792
  "logging_steps": 10,