smirki commited on
Commit
a1e7e77
·
verified ·
1 Parent(s): 1706ec3

Training in progress, step 2025, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d841bdf6b4728ba25ddfa075a6ecf7ffcd91ad64f151f5984cfdb0fb36616e2
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee2c4957d38c92a35e2e3a9382278d93a60be6d048e22e2962b28edcfeb9f100
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b591aa40759fbc489d41cede5b6d509cefae3af7c306b30fa1e6a7a4b8ec4837
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:981c9ca2b7a4413155c1784a3b128f562c7cb222b2b00cf2bcd73b66aba50336
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9ffa2392d644ba2690b0835df4eac79e599506fd693b988dfb49d247c7e500c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e6d264aea217f296bd9612ca82a53239d5b10338e8322f94ac8e49def93b492
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0119210f3709b267b1dcdc2165f2b55aac98c420d5275ee5428e502b1f632094
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:675f1831a53f04b0e24989b6bc8d9a17d48e7e3a0e3b788250eb946e8a1ecf93
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.015928512834399215,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2407,6 +2407,30 @@
2407
  "reward_std": 0.23378355875611306,
2408
  "rewards/custom_reward_simplified_v7_dblog": 0.921875,
2409
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2410
  }
2411
  ],
2412
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.016127619244829205,
5
  "eval_steps": 500,
6
+ "global_step": 2025,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2407
  "reward_std": 0.23378355875611306,
2408
  "rewards/custom_reward_simplified_v7_dblog": 0.921875,
2409
  "step": 2000
2410
+ },
2411
+ {
2412
+ "completion_length": 710.65,
2413
+ "epoch": 0.016008155398571214,
2414
+ "grad_norm": 0.13598495721817017,
2415
+ "kl": 0.01561300114262849,
2416
+ "learning_rate": 3.915213854677863e-07,
2417
+ "loss": 0.0006,
2418
+ "reward": 0.859375,
2419
+ "reward_std": 0.22324086129665374,
2420
+ "rewards/custom_reward_simplified_v7_dblog": 0.859375,
2421
+ "step": 2010
2422
+ },
2423
+ {
2424
+ "completion_length": 600.3625,
2425
+ "epoch": 0.01608779796274321,
2426
+ "grad_norm": 0.33102965354919434,
2427
+ "kl": 0.01562973433174193,
2428
+ "learning_rate": 3.722083189075007e-07,
2429
+ "loss": 0.0006,
2430
+ "reward": 1.0125,
2431
+ "reward_std": 0.37898894101381303,
2432
+ "rewards/custom_reward_simplified_v7_dblog": 1.0125,
2433
+ "step": 2020
2434
  }
2435
  ],
2436
  "logging_steps": 10,