smirki commited on
Commit
8c91d51
·
verified ·
1 Parent(s): 29448e1

Training in progress, step 1575, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4620c952d9d47f51b52dbaa4d676d99f0fa4424c13c706daf27aef599e4f2780
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cacbbfc56d391184888aa9988c9623102b11e0c5c6148c788c143ab677975a0
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dcd3d1e5ce9aeb2f3a0a439162a6354e3a87bdd42c2d824aa351222059be0bd
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb171f10034814744a50a6706f15be399005a3d17ff58bf5a682ba1f7e18f775
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3862494dcb8330500216500c3c10e90221d897be72203e9a311c19db30c18205
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:876f813d0b3daa10f80151c471970ca9ac357d5142b2911eac6c7f304a8b5d03
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbfbb7156838856bc44aea3d88f369f30c1abce4e6c21899507c2f357e97b51c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3d7fb31ee0a8472be5b464827ea90bb43e1dac4d2373a8313c33126d4dc9adc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.012344597446659392,
5
  "eval_steps": 500,
6
- "global_step": 1550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1867,6 +1867,30 @@
1867
  "reward_std": 0.3321776181459427,
1868
  "rewards/custom_reward_simplified_v7_dblog": 0.84375,
1869
  "step": 1550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1870
  }
1871
  ],
1872
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.012543703857089384,
5
  "eval_steps": 500,
6
+ "global_step": 1575,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1867
  "reward_std": 0.3321776181459427,
1868
  "rewards/custom_reward_simplified_v7_dblog": 0.84375,
1869
  "step": 1550
1870
+ },
1871
+ {
1872
+ "completion_length": 645.28125,
1873
+ "epoch": 0.01242424001083139,
1874
+ "grad_norm": 0.1851159930229187,
1875
+ "kl": 0.014482964109629393,
1876
+ "learning_rate": 1.6449496416858285e-06,
1877
+ "loss": 0.0006,
1878
+ "reward": 0.85625,
1879
+ "reward_std": 0.20507382601499557,
1880
+ "rewards/custom_reward_simplified_v7_dblog": 0.85625,
1881
+ "step": 1560
1882
+ },
1883
+ {
1884
+ "completion_length": 614.08125,
1885
+ "epoch": 0.012503882575003384,
1886
+ "grad_norm": 0.27418458461761475,
1887
+ "kl": 0.013118641986511647,
1888
+ "learning_rate": 1.6108730954628093e-06,
1889
+ "loss": 0.0005,
1890
+ "reward": 0.79375,
1891
+ "reward_std": 0.2820776253938675,
1892
+ "rewards/custom_reward_simplified_v7_dblog": 0.79375,
1893
+ "step": 1570
1894
  }
1895
  ],
1896
  "logging_steps": 10,