smirki commited on
Commit
8ab529d
·
verified ·
1 Parent(s): 24d5f3c

Training in progress, step 1950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:400bf9743fcfff3f47e3f0b9a1cede38e8d6e96374ac2cd587f2d2edfd906572
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3982e7c096a70a0347821728b97811479223f1df6099204e37383837d91f6f5
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3309a30e849ace9608e1957368fc06f650c97cc91eaa6df6d4bf2f6b649868f3
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cc874fd946b4a641da998c3cb0bfa07fd14122b6f28011d2891746400a37ff9
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c65db11f2bbb866945208742b8cd4b8865acadf113d5d9fbfe55b269b5ff1059
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a81ead0fc04d8776eae671d9a86963a7d3e1d3d1b066678dcfa501494dc5b51a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:deeb8215b1ed4392892b832a6e768b3c4ae9ca65d4af274686a16e7d74532396
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91dcb5b2d000f105f4d6d1ae7170d8fafaf43f44a4f442aeb38a35f6c7a13bc9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.015331193603109246,
5
  "eval_steps": 500,
6
- "global_step": 1925,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2311,6 +2311,42 @@
2311
  "reward_std": 0.38373097851872445,
2312
  "rewards/custom_reward_simplified_v7_dblog": 0.834375,
2313
  "step": 1920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2314
  }
2315
  ],
2316
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.015530300013539236,
5
  "eval_steps": 500,
6
+ "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2311
  "reward_std": 0.38373097851872445,
2312
  "rewards/custom_reward_simplified_v7_dblog": 0.834375,
2313
  "step": 1920
2314
+ },
2315
+ {
2316
+ "completion_length": 688.56875,
2317
+ "epoch": 0.015371014885195244,
2318
+ "grad_norm": 0.22155120968818665,
2319
+ "kl": 0.025313653564080597,
2320
+ "learning_rate": 5.617198567963353e-07,
2321
+ "loss": 0.001,
2322
+ "reward": 0.64375,
2323
+ "reward_std": 0.2539114162325859,
2324
+ "rewards/custom_reward_simplified_v7_dblog": 0.64375,
2325
+ "step": 1930
2326
+ },
2327
+ {
2328
+ "completion_length": 676.9125,
2329
+ "epoch": 0.01545065744936724,
2330
+ "grad_norm": 0.2373446673154831,
2331
+ "kl": 0.018907574540935456,
2332
+ "learning_rate": 5.389608377010608e-07,
2333
+ "loss": 0.0008,
2334
+ "reward": 0.821875,
2335
+ "reward_std": 0.1906539335846901,
2336
+ "rewards/custom_reward_simplified_v7_dblog": 0.821875,
2337
+ "step": 1940
2338
+ },
2339
+ {
2340
+ "completion_length": 640.675,
2341
+ "epoch": 0.015530300013539236,
2342
+ "grad_norm": 0.1865774542093277,
2343
+ "kl": 0.014899229886941612,
2344
+ "learning_rate": 5.166166492719124e-07,
2345
+ "loss": 0.0006,
2346
+ "reward": 0.725,
2347
+ "reward_std": 0.2747412838041782,
2348
+ "rewards/custom_reward_simplified_v7_dblog": 0.725,
2349
+ "step": 1950
2350
  }
2351
  ],
2352
  "logging_steps": 10,