smirki commited on
Commit
ebe1bb8
·
verified ·
1 Parent(s): a95fcec

Training in progress, step 1300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bad81dc96084ab5ddee74a4664e886906e4be5ca86b50b0a9fb72bc45570188
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94e10e53c76f7ff04829e808c63c93839ebd5429cd67f49bbfeea277b83d07a7
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:732a3e23bab6a35c7c0b1fbd3eab25075735978514241e667277b9927b941f9d
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b3cd44e38766e505ffcc5fc493c2a4306d24f1992cf69d3a5db1a4116a556db
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0622740b7f529d67896bed5f8ff648e5729882e67681daaf8eb3ef3cf2b65e92
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e345de18b4130f225fcea63e9859bfd38ec06f643e0ce309fd7f3ed7f28326
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fb41e5de709c0ea44e9bca507118fe887f61feeb314b07045c5c6637c0e7194
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dccb9bdfb0022ab0fbffb27c30bb70e7698ed3c92675c6050a89f10b3cdf6042
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0101544269319295,
5
  "eval_steps": 500,
6
- "global_step": 1275,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1531,6 +1531,42 @@
1531
  "reward_std": 0.2156815566122532,
1532
  "rewards/custom_reward_simplified_v7_dblog": 0.60625,
1533
  "step": 1270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1534
  }
1535
  ],
1536
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.010353533342359491,
5
  "eval_steps": 500,
6
+ "global_step": 1300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1531
  "reward_std": 0.2156815566122532,
1532
  "rewards/custom_reward_simplified_v7_dblog": 0.60625,
1533
  "step": 1270
1534
+ },
1535
+ {
1536
+ "completion_length": 641.9125,
1537
+ "epoch": 0.0101942482140155,
1538
+ "grad_norm": 0.21494239568710327,
1539
+ "kl": 0.008675340004265309,
1540
+ "learning_rate": 2.6453620722761897e-06,
1541
+ "loss": 0.0003,
1542
+ "reward": 0.81875,
1543
+ "reward_std": 0.22831376343965532,
1544
+ "rewards/custom_reward_simplified_v7_dblog": 0.81875,
1545
+ "step": 1280
1546
+ },
1547
+ {
1548
+ "completion_length": 650.5,
1549
+ "epoch": 0.010273890778187494,
1550
+ "grad_norm": 0.22972695529460907,
1551
+ "kl": 0.008116158202756196,
1552
+ "learning_rate": 2.6090484684133406e-06,
1553
+ "loss": 0.0003,
1554
+ "reward": 0.921875,
1555
+ "reward_std": 0.2564812809228897,
1556
+ "rewards/custom_reward_simplified_v7_dblog": 0.921875,
1557
+ "step": 1290
1558
+ },
1559
+ {
1560
+ "completion_length": 657.94375,
1561
+ "epoch": 0.010353533342359491,
1562
+ "grad_norm": 0.15338486433029175,
1563
+ "kl": 0.009256175020709634,
1564
+ "learning_rate": 2.572711796857779e-06,
1565
+ "loss": 0.0004,
1566
+ "reward": 0.709375,
1567
+ "reward_std": 0.21537503451108933,
1568
+ "rewards/custom_reward_simplified_v7_dblog": 0.709375,
1569
+ "step": 1300
1570
  }
1571
  ],
1572
  "logging_steps": 10,