smirki commited on
Commit
0311271
·
verified ·
1 Parent(s): 58bc831

Training in progress, step 1925, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:013e1a4a2e514b45c1477302c11e1065adcc3bbe1c09bc675409e2c5ae75c7df
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:400bf9743fcfff3f47e3f0b9a1cede38e8d6e96374ac2cd587f2d2edfd906572
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea3f86dcd6bd8021e80606e175ae4af07751c3370190d4d053658d8ae6e55f26
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3309a30e849ace9608e1957368fc06f650c97cc91eaa6df6d4bf2f6b649868f3
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81580e1e88878dfebd843045199ab5edec2e92f7c132f2ea86bcce1ce7f5e2ef
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c65db11f2bbb866945208742b8cd4b8865acadf113d5d9fbfe55b269b5ff1059
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:875915e5d6f2e2f0e0a7a4a850b61c3c410ff67126d9daeb7cdaa64d28801ee5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deeb8215b1ed4392892b832a6e768b3c4ae9ca65d4af274686a16e7d74532396
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.015132087192679255,
5
  "eval_steps": 500,
6
- "global_step": 1900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2287,6 +2287,30 @@
2287
  "reward_std": 0.2382744610309601,
2288
  "rewards/custom_reward_simplified_v7_dblog": 0.725,
2289
  "step": 1900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2290
  }
2291
  ],
2292
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.015331193603109246,
5
  "eval_steps": 500,
6
+ "global_step": 1925,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2287
  "reward_std": 0.2382744610309601,
2288
  "rewards/custom_reward_simplified_v7_dblog": 0.725,
2289
  "step": 1900
2290
+ },
2291
+ {
2292
+ "completion_length": 767.7375,
2293
+ "epoch": 0.015211729756851252,
2294
+ "grad_norm": 0.1330222189426422,
2295
+ "kl": 0.02190765142440796,
2296
+ "learning_rate": 6.084630428312679e-07,
2297
+ "loss": 0.0009,
2298
+ "reward": 0.66875,
2299
+ "reward_std": 0.27546602860093117,
2300
+ "rewards/custom_reward_simplified_v7_dblog": 0.66875,
2301
+ "step": 1910
2302
+ },
2303
+ {
2304
+ "completion_length": 726.63125,
2305
+ "epoch": 0.015291372321023247,
2306
+ "grad_norm": 0.21655875444412231,
2307
+ "kl": 0.02581467442214489,
2308
+ "learning_rate": 5.848888922025553e-07,
2309
+ "loss": 0.001,
2310
+ "reward": 0.834375,
2311
+ "reward_std": 0.38373097851872445,
2312
+ "rewards/custom_reward_simplified_v7_dblog": 0.834375,
2313
+ "step": 1920
2314
  }
2315
  ],
2316
  "logging_steps": 10,