smirki commited on
Commit
d9ae8b7
·
verified ·
1 Parent(s): 8b17aa2

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d0b10032621567c53e0e12f4051e3fa7f9880f7b87ae153f1600c2eed98d364
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd787e707294bb8bff5a0916c3fc1454572d55e0dd5aa3f2c0e8c49ccd9c4af3
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4c7399a7124ebfc8f2602ee4160ffecfe9c2c5345d69e9697c401f6fd3bd73d
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6f84bfa1b6ca95587c76a1fd509c395f4674dc76fe078a46e0a319386062a54
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d71d011e87b512f28e794476e44bdcb409ab9a4721e9b4147120eeb12f1053d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a3313de4fd5d6c377ad3ad093a56c26fc951ef200a8e1b32181726369bbbd9a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47394e7e6639b2da14254e3e88e50a05cb1b1d15a05d1aa46398ae3b93c7909f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d875190fd38553ed31bf8899557c08005bdf782b4117087e46fd1724f637210
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0015928512834399217,
5
  "eval_steps": 500,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -247,6 +247,66 @@
247
  "reward_std": 0.09681975245475768,
248
  "rewards/custom_reward_logic_v2": 0.06411250084638595,
249
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  }
251
  ],
252
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.001991064104299902,
5
  "eval_steps": 500,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
247
  "reward_std": 0.09681975245475768,
248
  "rewards/custom_reward_logic_v2": 0.06411250084638595,
249
  "step": 200
250
+ },
251
+ {
252
+ "completion_length": 19.3,
253
+ "epoch": 0.0016724938476119177,
254
+ "grad_norm": 0.03454764187335968,
255
+ "kl": 0.3337583176791668,
256
+ "learning_rate": 4.608478614532215e-06,
257
+ "loss": 0.0134,
258
+ "reward": 0.21312500052154065,
259
+ "reward_std": 0.1542310357093811,
260
+ "rewards/custom_reward_logic_v2": 0.21312500052154065,
261
+ "step": 210
262
+ },
263
+ {
264
+ "completion_length": 48.96875,
265
+ "epoch": 0.0017521364117839139,
266
+ "grad_norm": 0.8877259492874146,
267
+ "kl": 0.3230514988303185,
268
+ "learning_rate": 4.54788011072248e-06,
269
+ "loss": 0.0129,
270
+ "reward": -0.12147499993443489,
271
+ "reward_std": 0.4157312333583832,
272
+ "rewards/custom_reward_logic_v2": -0.12147499993443489,
273
+ "step": 220
274
+ },
275
+ {
276
+ "completion_length": 19.41875,
277
+ "epoch": 0.0018317789759559099,
278
+ "grad_norm": 0.7465932369232178,
279
+ "kl": 0.32680382803082464,
280
+ "learning_rate": 4.4833833507280884e-06,
281
+ "loss": 0.0131,
282
+ "reward": 0.14000000059604645,
283
+ "reward_std": 0.09731742069125175,
284
+ "rewards/custom_reward_logic_v2": 0.14000000059604645,
285
+ "step": 230
286
+ },
287
+ {
288
+ "completion_length": 23.425,
289
+ "epoch": 0.001911421540127906,
290
+ "grad_norm": 0.4111487567424774,
291
+ "kl": 0.3509559452533722,
292
+ "learning_rate": 4.415111107797445e-06,
293
+ "loss": 0.014,
294
+ "reward": 0.18286250159144402,
295
+ "reward_std": 0.1811980500817299,
296
+ "rewards/custom_reward_logic_v2": 0.18286250159144402,
297
+ "step": 240
298
+ },
299
+ {
300
+ "completion_length": 18.91875,
301
+ "epoch": 0.001991064104299902,
302
+ "grad_norm": 0.8882763385772705,
303
+ "kl": 0.3525215476751328,
304
+ "learning_rate": 4.34319334202531e-06,
305
+ "loss": 0.0141,
306
+ "reward": 0.17062499970197678,
307
+ "reward_std": 0.11504097878932953,
308
+ "rewards/custom_reward_logic_v2": 0.17062499970197678,
309
+ "step": 250
310
  }
311
  ],
312
  "logging_steps": 10,