smirki commited on
Commit
a0c8459
·
verified ·
1 Parent(s): 6af8aad

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab9a1c3e4f7eeb26f1787aab2bede14a97d3e21b68a19b5777a98b5c4dc9c594
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a656f61ae1ccd60f5456798788c8e6ded39b418346d6fe6071eb8ce468298a83
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecb6746c7a22945701e7525b6e96db148acb5c526d8ec4de574c6cd87337c3b9
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf849eb44752b2c9a42282b0505698ef4c3181196d5a7e2c98af9389f0ba6ef6
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eed43572ea1df9b6a964b9089ff0a48779e2a1cc8929f8355fea19e7f7ab5c5f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd3ccbab0f5afd279e81643827121534f4d8480da4449b3d948c3974c92f9bbd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:905cad78b215386b1078b951a642067b64baccf738f77304c45fea8d964d3906
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8241f030a944f045c9ceca4728f16fdc2a35629a475d8a3997b745440061990
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.002787489746019863,
5
  "eval_steps": 500,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -427,6 +427,66 @@
427
  "reward_std": 0.15355074554681777,
428
  "rewards/custom_reward_logic_v2": 0.15562499798834323,
429
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  }
431
  ],
432
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0031857025668798433,
5
  "eval_steps": 500,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
427
  "reward_std": 0.15355074554681777,
428
  "rewards/custom_reward_logic_v2": 0.15562499798834323,
429
  "step": 350
430
+ },
431
+ {
432
+ "completion_length": 21.21875,
433
+ "epoch": 0.002867132310191859,
434
+ "grad_norm": 0.8673160672187805,
435
+ "kl": 0.328788036108017,
436
+ "learning_rate": 3.3550503583141726e-06,
437
+ "loss": 0.0132,
438
+ "reward": 0.22808750197291375,
439
+ "reward_std": 0.14038661643862724,
440
+ "rewards/custom_reward_logic_v2": 0.22808750197291375,
441
+ "step": 360
442
+ },
443
+ {
444
+ "completion_length": 18.29375,
445
+ "epoch": 0.002946774874363855,
446
+ "grad_norm": 1.275578260421753,
447
+ "kl": 0.3586613781750202,
448
+ "learning_rate": 3.2517644987606827e-06,
449
+ "loss": 0.0143,
450
+ "reward": 0.09437500052154064,
451
+ "reward_std": 0.13283729180693626,
452
+ "rewards/custom_reward_logic_v2": 0.09437500052154064,
453
+ "step": 370
454
+ },
455
+ {
456
+ "completion_length": 19.625,
457
+ "epoch": 0.0030264174385358513,
458
+ "grad_norm": 1.135249376296997,
459
+ "kl": 0.3399433046579361,
460
+ "learning_rate": 3.147047612756302e-06,
461
+ "loss": 0.0136,
462
+ "reward": 0.18000000156462193,
463
+ "reward_std": 0.1102687232196331,
464
+ "rewards/custom_reward_logic_v2": 0.18000000156462193,
465
+ "step": 380
466
+ },
467
+ {
468
+ "completion_length": 18.65625,
469
+ "epoch": 0.0031060600027078473,
470
+ "grad_norm": 0.0214656013995409,
471
+ "kl": 0.3453727260231972,
472
+ "learning_rate": 3.0410990348452572e-06,
473
+ "loss": 0.0138,
474
+ "reward": 0.14312500059604644,
475
+ "reward_std": 0.21185824573040007,
476
+ "rewards/custom_reward_logic_v2": 0.14312500059604644,
477
+ "step": 390
478
+ },
479
+ {
480
+ "completion_length": 22.5875,
481
+ "epoch": 0.0031857025668798433,
482
+ "grad_norm": 1.1392817497253418,
483
+ "kl": 0.3561431519687176,
484
+ "learning_rate": 2.9341204441673267e-06,
485
+ "loss": 0.0142,
486
+ "reward": 0.09312500022351741,
487
+ "reward_std": 0.09467698186635971,
488
+ "rewards/custom_reward_logic_v2": 0.09312500022351741,
489
+ "step": 400
490
  }
491
  ],
492
  "logging_steps": 10,