smirki commited on
Commit
a86b00a
·
verified ·
1 Parent(s): fd95d31

Training in progress, step 1825, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47a7f2a9e5514ded39fec42ccf85affb0ea83765d8cd5710f5ffae2ac93a7539
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0227de8dffd60e7bcbc361e28f5f14d86f6b8aa6b9faaa25078af2c1664371
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad0f2461614af492d300b806eb46f3bbcfd9aac2534006fb03315af5654f03d1
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b50241b912450499aa67b6f47d8ef5d57cc918130f305986edc730a6c70d0be
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8238871797f984a007bcc429ff439a023be817586923e09bedcced1680e3b8e1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc279ccf06d94b21f0f1142b3ba0467a4b037c890e7d4c8b4d0d9959c7a643b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5affcab53c6bf0c312245e4ec3117742a7ae09a65ad0d199a0c62a7385ad2300
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e18d4bd19d02103826c6ccfe1e046ad882c768a3c57be1799d9b12107011c97
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.014335661550959295,
5
  "eval_steps": 500,
6
- "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2167,6 +2167,30 @@
2167
  "reward_std": 0.28967257887125014,
2168
  "rewards/custom_reward_simplified_v7_dblog": 0.75,
2169
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2170
  }
2171
  ],
2172
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.014534767961389285,
5
  "eval_steps": 500,
6
+ "global_step": 1825,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2167
  "reward_std": 0.28967257887125014,
2168
  "rewards/custom_reward_simplified_v7_dblog": 0.75,
2169
  "step": 1800
2170
+ },
2171
+ {
2172
+ "completion_length": 689.26875,
2173
+ "epoch": 0.01441530411513129,
2174
+ "grad_norm": 0.17589329183101654,
2175
+ "kl": 0.016255489736795425,
2176
+ "learning_rate": 8.653477618573261e-07,
2177
+ "loss": 0.0007,
2178
+ "reward": 0.765625,
2179
+ "reward_std": 0.3363394603133202,
2180
+ "rewards/custom_reward_simplified_v7_dblog": 0.765625,
2181
+ "step": 1810
2182
+ },
2183
+ {
2184
+ "completion_length": 640.91875,
2185
+ "epoch": 0.014494946679303287,
2186
+ "grad_norm": 0.21075929701328278,
2187
+ "kl": 0.015922663966193795,
2188
+ "learning_rate": 8.380103359651554e-07,
2189
+ "loss": 0.0006,
2190
+ "reward": 0.925,
2191
+ "reward_std": 0.3459245666861534,
2192
+ "rewards/custom_reward_simplified_v7_dblog": 0.925,
2193
+ "step": 1820
2194
  }
2195
  ],
2196
  "logging_steps": 10,