smirki commited on
Commit
6c118f5
·
verified ·
1 Parent(s): 4d6fddd

Training in progress, step 1275, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b01a64a72b5add0f6fd2fb87887bf51cc687c3d81596e93d4bc43a3262efdc8
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bad81dc96084ab5ddee74a4664e886906e4be5ca86b50b0a9fb72bc45570188
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:362d1e054b84b0d500e8438627b240c7e2097ee86b8e098e8f3c941c83016f24
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:732a3e23bab6a35c7c0b1fbd3eab25075735978514241e667277b9927b941f9d
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:945a25d4c45d543ac1804f21f5b94dcfff3f3d66f6934f364600a8816d57e5dc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0622740b7f529d67896bed5f8ff648e5729882e67681daaf8eb3ef3cf2b65e92
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1305a35e69c26a1911821f2a4173b0f97cfc01b88eb1cc64c99b91067e3e293
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fb41e5de709c0ea44e9bca507118fe887f61feeb314b07045c5c6637c0e7194
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00995532052149951,
5
  "eval_steps": 500,
6
- "global_step": 1250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1507,6 +1507,30 @@
1507
  "reward_std": 0.19232839569449425,
1508
  "rewards/custom_reward_simplified_v7_dblog": 0.6625,
1509
  "step": 1250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1510
  }
1511
  ],
1512
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0101544269319295,
5
  "eval_steps": 500,
6
+ "global_step": 1275,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1507
  "reward_std": 0.19232839569449425,
1508
  "rewards/custom_reward_simplified_v7_dblog": 0.6625,
1509
  "step": 1250
1510
+ },
1511
+ {
1512
+ "completion_length": 615.5875,
1513
+ "epoch": 0.010034963085671506,
1514
+ "grad_norm": 0.14006367325782776,
1515
+ "kl": 0.008278649020940065,
1516
+ "learning_rate": 2.717889356869146e-06,
1517
+ "loss": 0.0003,
1518
+ "reward": 0.903125,
1519
+ "reward_std": 0.3407335430383682,
1520
+ "rewards/custom_reward_simplified_v7_dblog": 0.903125,
1521
+ "step": 1260
1522
+ },
1523
+ {
1524
+ "completion_length": 727.70625,
1525
+ "epoch": 0.010114605649843502,
1526
+ "grad_norm": 0.005724642425775528,
1527
+ "kl": 0.009203878976404668,
1528
+ "learning_rate": 2.681644926806527e-06,
1529
+ "loss": 0.0004,
1530
+ "reward": 0.60625,
1531
+ "reward_std": 0.2156815566122532,
1532
+ "rewards/custom_reward_simplified_v7_dblog": 0.60625,
1533
+ "step": 1270
1534
  }
1535
  ],
1536
  "logging_steps": 10,