smirki commited on
Commit
6c67425
·
verified ·
1 Parent(s): 5c7cf94

Training in progress, step 350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc3cf965cb3e46bce1a07e647d9e64ded38cef1306287e6c24c592e3bafdafa6
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab9a1c3e4f7eeb26f1787aab2bede14a97d3e21b68a19b5777a98b5c4dc9c594
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99f35c97132fdb9c45702a3a626946b943e4f0e9d6e04d821cf969f555bfc36f
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecb6746c7a22945701e7525b6e96db148acb5c526d8ec4de574c6cd87337c3b9
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dc64f06f07f11b5133abb444d47a3661de90bef33673b0253120e1e16093534
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed43572ea1df9b6a964b9089ff0a48779e2a1cc8929f8355fea19e7f7ab5c5f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e96b3a3c408ef67987cac348d29150759cd1e3152271b07b879ff530abc69a0f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:905cad78b215386b1078b951a642067b64baccf738f77304c45fea8d964d3906
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0023892769251598824,
5
  "eval_steps": 500,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -367,6 +367,66 @@
367
  "reward_std": 0.07851103022694587,
368
  "rewards/custom_reward_logic_v2": 0.10625000111758709,
369
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  }
371
  ],
372
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.002787489746019863,
5
  "eval_steps": 500,
6
+ "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
367
  "reward_std": 0.07851103022694587,
368
  "rewards/custom_reward_logic_v2": 0.10625000111758709,
369
  "step": 300
370
+ },
371
+ {
372
+ "completion_length": 28.39375,
373
+ "epoch": 0.0024689194893318784,
374
+ "grad_norm": 1.2737127542495728,
375
+ "kl": 0.3259002223610878,
376
+ "learning_rate": 3.8432490208670605e-06,
377
+ "loss": 0.013,
378
+ "reward": 0.07012500055134296,
379
+ "reward_std": 0.21550666987895967,
380
+ "rewards/custom_reward_logic_v2": 0.07012500055134296,
381
+ "step": 310
382
+ },
383
+ {
384
+ "completion_length": 20.49375,
385
+ "epoch": 0.002548562053503875,
386
+ "grad_norm": 1.3667010068893433,
387
+ "kl": 0.32961594611406325,
388
+ "learning_rate": 3.7500000000000005e-06,
389
+ "loss": 0.0132,
390
+ "reward": 0.15562500059604645,
391
+ "reward_std": 0.14379026368260384,
392
+ "rewards/custom_reward_logic_v2": 0.15562500059604645,
393
+ "step": 320
394
+ },
395
+ {
396
+ "completion_length": 23.7625,
397
+ "epoch": 0.002628204617675871,
398
+ "grad_norm": 0.9662195444107056,
399
+ "kl": 0.3291011206805706,
400
+ "learning_rate": 3.654371533087586e-06,
401
+ "loss": 0.0132,
402
+ "reward": 0.20617500003427267,
403
+ "reward_std": 0.12530190348625184,
404
+ "rewards/custom_reward_logic_v2": 0.20617500003427267,
405
+ "step": 330
406
+ },
407
+ {
408
+ "completion_length": 19.15,
409
+ "epoch": 0.002707847181847867,
410
+ "grad_norm": 2.964785099029541,
411
+ "kl": 0.3629206448793411,
412
+ "learning_rate": 3.556545654351749e-06,
413
+ "loss": 0.0145,
414
+ "reward": 0.10437500067055225,
415
+ "reward_std": 0.12071752324700355,
416
+ "rewards/custom_reward_logic_v2": 0.10437500067055225,
417
+ "step": 340
418
+ },
419
+ {
420
+ "completion_length": 20.4875,
421
+ "epoch": 0.002787489746019863,
422
+ "grad_norm": 1.0044533014297485,
423
+ "kl": 0.3254102662205696,
424
+ "learning_rate": 3.4567085809127247e-06,
425
+ "loss": 0.013,
426
+ "reward": 0.15562499798834323,
427
+ "reward_std": 0.15355074554681777,
428
+ "rewards/custom_reward_logic_v2": 0.15562499798834323,
429
+ "step": 350
430
  }
431
  ],
432
  "logging_steps": 10,