smirki commited on
Commit
1c79f4b
·
verified ·
1 Parent(s): 3edeba4

Training in progress, step 375, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8de23388570c9c810f5c012fe5f79293c683389ee14efbf7548903bedc02443c
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f24d276f7fb7916ab15b39da6e2b88fa4797d8da0140e77a25bceeaed815bc3
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c1f16e4f6ca73d6b0d0b653f80247fdd3507861ab8750db5c0fca342cc5adf0
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51ab91babec334efadc5b34fbf32381f2ed2d09198a2b7a61be4e6940e7259f8
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:631bee6c95d1479c29399db52e8b982c9c130ac80a19f6b7aa7d80cfc04f08ec
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdcafde9e1f0f9ecf6d844d2ef33445b6716f1049ad07534eb5ed4be8ce625d0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b017f223f7cbfcbd91ec9cfbd378a9bc9c27e3fa5227c019815cc49a05577b2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ee62f39915fab64d2f1ab670d54e213cfc2d70e4cd48a7b1400797011d55aa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.002787489746019863,
5
  "eval_steps": 500,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -427,6 +427,30 @@
427
  "reward_std": 0.3807508498430252,
428
  "rewards/custom_reward_simplified_v7_dblog": 0.74375,
429
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  }
431
  ],
432
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.002986596156449853,
5
  "eval_steps": 500,
6
+ "global_step": 375,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
427
  "reward_std": 0.3807508498430252,
428
  "rewards/custom_reward_simplified_v7_dblog": 0.74375,
429
  "step": 350
430
+ },
431
+ {
432
+ "completion_length": 732.4375,
433
+ "epoch": 0.002867132310191859,
434
+ "grad_norm": 0.21898534893989563,
435
+ "kl": 0.002607938600704074,
436
+ "learning_rate": 4.962019382530521e-06,
437
+ "loss": 0.0001,
438
+ "reward": 0.596875,
439
+ "reward_std": 0.303117785602808,
440
+ "rewards/custom_reward_simplified_v7_dblog": 0.596875,
441
+ "step": 360
442
+ },
443
+ {
444
+ "completion_length": 703.21875,
445
+ "epoch": 0.002946774874363855,
446
+ "grad_norm": 0.20463985204696655,
447
+ "kl": 0.0030091375578194858,
448
+ "learning_rate": 4.955445176176577e-06,
449
+ "loss": 0.0001,
450
+ "reward": 0.746875,
451
+ "reward_std": 0.28880608528852464,
452
+ "rewards/custom_reward_simplified_v7_dblog": 0.746875,
453
+ "step": 370
454
  }
455
  ],
456
  "logging_steps": 10,