smirki commited on
Commit
43fb6f2
·
verified ·
1 Parent(s): aacfa7b

Training in progress, step 1350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74682732035656f7a989ee44f1f36f8e416b584aa4564d6a3765d685f4981071
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b901f880fa9ed0f060f71cce70c866baf6b12ff9b46744f1cd5db7d6a751126
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:337ffc24497931d1740d801f5febab092071f825b7858ec9a867066b6a02e90c
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0632dbbf29d2931fe7ce743e19d0f3cf69ee7a7d9e92be339d9792d785000ee2
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57cab1721b5ddb43639ccad6e4533ec798a026a004201613eb0b4122b1bbb3f9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa7f2184140c298d88f1de4da49013475309ad1f88b477c6a086754fa7764dc5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13318808563c2924b4edd2b4e6dbfa122edfbebfd027b1583aa4d422089a88ec
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:023537b1d42d463489031d31c03e4c8fead35d65882bdcee8e6825976c4bcbf6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.010552639752789481,
5
  "eval_steps": 500,
6
- "global_step": 1325,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1591,6 +1591,42 @@
1591
  "reward_std": 0.26380954012274743,
1592
  "rewards/custom_reward_simplified_v7_dblog": 0.7125,
1593
  "step": 1320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1594
  }
1595
  ],
1596
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.01075174616321947,
5
  "eval_steps": 500,
6
+ "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1591
  "reward_std": 0.26380954012274743,
1592
  "rewards/custom_reward_simplified_v7_dblog": 0.7125,
1593
  "step": 1320
1594
+ },
1595
+ {
1596
+ "completion_length": 715.7,
1597
+ "epoch": 0.010592461034875478,
1598
+ "grad_norm": 0.16817767918109894,
1599
+ "kl": 0.013910629483871163,
1600
+ "learning_rate": 2.4636402558710434e-06,
1601
+ "loss": 0.0006,
1602
+ "reward": 0.759375,
1603
+ "reward_std": 0.2193169414997101,
1604
+ "rewards/custom_reward_simplified_v7_dblog": 0.759375,
1605
+ "step": 1330
1606
+ },
1607
+ {
1608
+ "completion_length": 655.90625,
1609
+ "epoch": 0.010672103599047475,
1610
+ "grad_norm": 0.2265154868364334,
1611
+ "kl": 0.00848452327772975,
1612
+ "learning_rate": 2.4272882031422216e-06,
1613
+ "loss": 0.0003,
1614
+ "reward": 0.78125,
1615
+ "reward_std": 0.3443989932537079,
1616
+ "rewards/custom_reward_simplified_v7_dblog": 0.78125,
1617
+ "step": 1340
1618
+ },
1619
+ {
1620
+ "completion_length": 660.075,
1621
+ "epoch": 0.01075174616321947,
1622
+ "grad_norm": 0.24644052982330322,
1623
+ "kl": 0.009867909434251487,
1624
+ "learning_rate": 2.3909515315866606e-06,
1625
+ "loss": 0.0004,
1626
+ "reward": 0.79375,
1627
+ "reward_std": 0.29604131579399107,
1628
+ "rewards/custom_reward_simplified_v7_dblog": 0.79375,
1629
+ "step": 1350
1630
  }
1631
  ],
1632
  "logging_steps": 10,