smirki commited on
Commit
b164877
·
verified ·
1 Parent(s): 1dfaa05

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61d5f3b3956507a0db89b2b255c80a675f44ff5e99a7e6ae7ea27a9dbae5245c
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:554efc4b31a2b5b45666110a342ac5c45b243dfe12be5334b44fda80de8f5aa4
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:640381d5936ef49033582656f8fe75d216ab5ee94c287238fec5029f5c57db33
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8a2d96c7bf68d9800f4176d313fab93b3142d7c82237e72e2ae4d9f3e4cb61
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:841fc0dbc0dff2d8abfca3a999fe5c384dfee0019e02325f871bf3d7abbde657
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edea5f37f705ad565383833c9ad60f92237a837032a0c28939c687cad7d33003
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51e44ce6c7f1c4d261c4ea0ef93ba7bc8b23a59d005cedfd24704873d0d6bb0b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30a96a106a18e32bbd701b0dcdc53e964437fa7e7a4942bad969ac65b7f0ae1f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.000995532052149951,
5
  "eval_steps": 500,
6
- "global_step": 125,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -151,6 +151,42 @@
151
  "reward_std": 0.2573545627295971,
152
  "rewards/custom_reward_simplified_v7_dblog": 0.728125,
153
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  }
155
  ],
156
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0011946384625799412,
5
  "eval_steps": 500,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
151
  "reward_std": 0.2573545627295971,
152
  "rewards/custom_reward_simplified_v7_dblog": 0.728125,
153
  "step": 120
154
+ },
155
+ {
156
+ "completion_length": 872.6375,
157
+ "epoch": 0.001035353334235949,
158
+ "grad_norm": 0.11807532608509064,
159
+ "kl": 0.0007370044564595446,
160
+ "learning_rate": 2.7083333333333334e-06,
161
+ "loss": 0.0,
162
+ "reward": 0.45,
163
+ "reward_std": 0.24368184804916382,
164
+ "rewards/custom_reward_simplified_v7_dblog": 0.45,
165
+ "step": 130
166
+ },
167
+ {
168
+ "completion_length": 780.325,
169
+ "epoch": 0.0011149958984079452,
170
+ "grad_norm": 0.21067936718463898,
171
+ "kl": 0.0007969280297402293,
172
+ "learning_rate": 2.916666666666667e-06,
173
+ "loss": 0.0,
174
+ "reward": 0.671875,
175
+ "reward_std": 0.3312204420566559,
176
+ "rewards/custom_reward_simplified_v7_dblog": 0.671875,
177
+ "step": 140
178
+ },
179
+ {
180
+ "completion_length": 796.15625,
181
+ "epoch": 0.0011946384625799412,
182
+ "grad_norm": 0.11178277432918549,
183
+ "kl": 0.0007584215141832829,
184
+ "learning_rate": 3.125e-06,
185
+ "loss": 0.0,
186
+ "reward": 0.675,
187
+ "reward_std": 0.2411833107471466,
188
+ "rewards/custom_reward_simplified_v7_dblog": 0.675,
189
+ "step": 150
190
  }
191
  ],
192
  "logging_steps": 10,