smirki commited on
Commit
e412ea7
·
verified ·
1 Parent(s): bd9e1b4

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc93f9835831dfe4fe8f3fabe09b29fc36f37216ea398f53ad427fb270ac8fe5
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97bd2071c863777af98b0e0e6835dfbd00b846cf114dd8805232a1e59165eb4d
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96b7c7d4975d19f24910799848311416fe9ce4376edbc7f5c1ce181682b8608b
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b07f59c35dc209b4c9a5c3ea67c4ff82d683bff7ee192361d73fa058b15b139
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce3fefcac352ac4ee619c7ff01acc8acab65caca37da4199453b9a9441a77444
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b84da873207bf99b33af0e8b6206521d9a66d95873145146735310919d6f6590
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9d2f48ad4251bd6b293d199cd40a6a67cdcf63f7330a6380c30e1422c7eaee0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e33b54a8302eb57494695935487eaaa98a78a3203f913dcc4eaf73f18edd66c0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.011747278215369422,
5
  "eval_steps": 500,
6
- "global_step": 1475,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1771,6 +1771,42 @@
1771
  "reward_std": 0.21832374781370162,
1772
  "rewards/custom_reward_simplified_v7_dblog": 0.64375,
1773
  "step": 1470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1774
  }
1775
  ],
1776
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.011946384625799411,
5
  "eval_steps": 500,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1771
  "reward_std": 0.21832374781370162,
1772
  "rewards/custom_reward_simplified_v7_dblog": 0.64375,
1773
  "step": 1470
1774
+ },
1775
+ {
1776
+ "completion_length": 722.06875,
1777
+ "epoch": 0.01178709949745542,
1778
+ "grad_norm": 0.13200955092906952,
1779
+ "kl": 0.013854384049773216,
1780
+ "learning_rate": 1.9234603231439e-06,
1781
+ "loss": 0.0006,
1782
+ "reward": 0.790625,
1783
+ "reward_std": 0.2784456007182598,
1784
+ "rewards/custom_reward_simplified_v7_dblog": 0.790625,
1785
+ "step": 1480
1786
+ },
1787
+ {
1788
+ "completion_length": 664.46875,
1789
+ "epoch": 0.011866742061627416,
1790
+ "grad_norm": 0.14230677485466003,
1791
+ "kl": 0.012553655169904232,
1792
+ "learning_rate": 1.8881416401141905e-06,
1793
+ "loss": 0.0005,
1794
+ "reward": 0.9,
1795
+ "reward_std": 0.23252918049693108,
1796
+ "rewards/custom_reward_simplified_v7_dblog": 0.9,
1797
+ "step": 1490
1798
+ },
1799
+ {
1800
+ "completion_length": 653.79375,
1801
+ "epoch": 0.011946384625799411,
1802
+ "grad_norm": 0.17014774680137634,
1803
+ "kl": 0.01346926314290613,
1804
+ "learning_rate": 1.852952387243698e-06,
1805
+ "loss": 0.0005,
1806
+ "reward": 0.740625,
1807
+ "reward_std": 0.22115055918693544,
1808
+ "rewards/custom_reward_simplified_v7_dblog": 0.740625,
1809
+ "step": 1500
1810
  }
1811
  ],
1812
  "logging_steps": 10,