smirki commited on
Commit
38c84d4
·
verified ·
1 Parent(s): d6bda04

Training in progress, step 1175, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b87a7124fc823146a12cbeaefbe5ff8d91326cefab763555aa61dbbb9e056fab
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:780adb9accd35e89fa4d6e0b16d07afb98a5ad312c157c14f5c6fc3688b6b435
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f14effc9368161c55b1d77fbb7e90bb1cd9af0086afde89e1a683d869fb3248
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8186daf7088705064b8cda2fa05ea5b15ac3829a5cc745cfd0463cf0775d065
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b6ca00cea62eabd929fd77d62694b267d47c0603af124f659274f264d0bf70b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4eda2caa8c1d334010b2826a85bd0fa5d6353a1455cace044b4ee905e374a02
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a1625baaf061099e4aa140afa72e1a2a32a1a3e25d7baf8a019a63d52d6e43
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7aa6200e076e6b4e3b8c147bb3db3fdb1f862f67341ccf617eae90a633431d6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.009158894879779549,
5
  "eval_steps": 500,
6
- "global_step": 1150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1387,6 +1387,30 @@
1387
  "reward_std": 0.29204289317131044,
1388
  "rewards/custom_reward_simplified_v7_dblog": 0.6875,
1389
  "step": 1150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1390
  }
1391
  ],
1392
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.00935800129020954,
5
  "eval_steps": 500,
6
+ "global_step": 1175,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1387
  "reward_std": 0.29204289317131044,
1388
  "rewards/custom_reward_simplified_v7_dblog": 0.6875,
1389
  "step": 1150
1390
+ },
1391
+ {
1392
+ "completion_length": 641.88125,
1393
+ "epoch": 0.009238537443951545,
1394
+ "grad_norm": 0.007003675680607557,
1395
+ "kl": 0.007272082474082708,
1396
+ "learning_rate": 3.0765396768561005e-06,
1397
+ "loss": 0.0003,
1398
+ "reward": 0.875,
1399
+ "reward_std": 0.2666669487953186,
1400
+ "rewards/custom_reward_simplified_v7_dblog": 0.875,
1401
+ "step": 1160
1402
+ },
1403
+ {
1404
+ "completion_length": 645.55625,
1405
+ "epoch": 0.009318180008123542,
1406
+ "grad_norm": 0.005993107333779335,
1407
+ "kl": 0.00769920782186091,
1408
+ "learning_rate": 3.0410990348452572e-06,
1409
+ "loss": 0.0003,
1410
+ "reward": 0.846875,
1411
+ "reward_std": 0.29315834268927576,
1412
+ "rewards/custom_reward_simplified_v7_dblog": 0.846875,
1413
+ "step": 1170
1414
  }
1415
  ],
1416
  "logging_steps": 10,