smirki commited on
Commit
900f91b
·
verified ·
1 Parent(s): c6f1841

Training in progress, step 700, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7927585b38c284985305607dff1216956b2ec6766011237d98f764aa65d8a68
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e023b69876a28967f68d2e2ca3f8be837b2c4981d6e3ad2049a01c6786eaa312
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad7ca2ba371f72eb8e0be449ebbd259143484bb427530b2555406e081648b613
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d213ee4b82282a82674cd9ce017085e7a7b3603c3dad380d9e3921fa872458b
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7455eeaa206bf36d4b15096f6439d20efc5b6e48cc80079c496864649b7e925
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df73025cee4dddc3e485bae9a5251784ca346a6e5d55d7c1b83f877ae248859a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6841cf52b84fc804fe9e82d0922149bacf5c720074060a8df77995cba4f40aaa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b98b9203fd15f057816eda32716436bfe57e83707db8ff60acfc089588f9da41
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.005176766671179746,
5
  "eval_steps": 500,
6
- "global_step": 650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -787,6 +787,66 @@
787
  "reward_std": 0.11013087928295136,
788
  "rewards/custom_reward_logic_v2": 0.0818750023841858,
789
  "step": 650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
790
  }
791
  ],
792
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.005574979492039726,
5
  "eval_steps": 500,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
787
  "reward_std": 0.11013087928295136,
788
  "rewards/custom_reward_logic_v2": 0.0818750023841858,
789
  "step": 650
790
+ },
791
+ {
792
+ "completion_length": 19.23125,
793
+ "epoch": 0.005256409235351742,
794
+ "grad_norm": 1.1138290166854858,
795
+ "kl": 0.3609082795679569,
796
+ "learning_rate": 4.5211988927752026e-07,
797
+ "loss": 0.0144,
798
+ "reward": 0.13375000059604644,
799
+ "reward_std": 0.22290636524558066,
800
+ "rewards/custom_reward_logic_v2": 0.13375000059604644,
801
+ "step": 660
802
+ },
803
+ {
804
+ "completion_length": 21.1375,
805
+ "epoch": 0.005336051799523738,
806
+ "grad_norm": 0.7887033820152283,
807
+ "kl": 0.36398947462439535,
808
+ "learning_rate": 3.915213854677863e-07,
809
+ "loss": 0.0146,
810
+ "reward": 0.07750000171363354,
811
+ "reward_std": 0.09986742436885834,
812
+ "rewards/custom_reward_logic_v2": 0.07750000171363354,
813
+ "step": 670
814
+ },
815
+ {
816
+ "completion_length": 20.7375,
817
+ "epoch": 0.005415694363695734,
818
+ "grad_norm": 1.2118674516677856,
819
+ "kl": 0.34819948896765707,
820
+ "learning_rate": 3.3493649053890325e-07,
821
+ "loss": 0.0139,
822
+ "reward": 0.14000000059604645,
823
+ "reward_std": 0.15659263283014296,
824
+ "rewards/custom_reward_logic_v2": 0.14000000059604645,
825
+ "step": 680
826
+ },
827
+ {
828
+ "completion_length": 25.03125,
829
+ "epoch": 0.00549533692786773,
830
+ "grad_norm": 0.6678434014320374,
831
+ "kl": 0.3506194405257702,
832
+ "learning_rate": 2.8247291705444575e-07,
833
+ "loss": 0.014,
834
+ "reward": 0.10087500289082527,
835
+ "reward_std": 0.19995234534144402,
836
+ "rewards/custom_reward_logic_v2": 0.10087500289082527,
837
+ "step": 690
838
+ },
839
+ {
840
+ "completion_length": 19.75,
841
+ "epoch": 0.005574979492039726,
842
+ "grad_norm": 1.028297781944275,
843
+ "kl": 0.33339232876896857,
844
+ "learning_rate": 2.3423053240837518e-07,
845
+ "loss": 0.0133,
846
+ "reward": 0.09312500022351741,
847
+ "reward_std": 0.09688087031245232,
848
+ "rewards/custom_reward_logic_v2": 0.09312500022351741,
849
+ "step": 700
850
  }
851
  ],
852
  "logging_steps": 10,