smirki commited on
Commit
4358a64
·
verified ·
1 Parent(s): 3daa0ed

Training in progress, step 1850, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0227de8dffd60e7bcbc361e28f5f14d86f6b8aa6b9faaa25078af2c1664371
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5394825a795e8c2ec586e2ccde14f9f1732bd83cb73a2e054c0fd16cab24bbf
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b50241b912450499aa67b6f47d8ef5d57cc918130f305986edc730a6c70d0be
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3baa6fc86afbc2ac24ca4ccb3c82a2a206a33c1941bd40fbf22e526547f85bc
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdc279ccf06d94b21f0f1142b3ba0467a4b037c890e7d4c8b4d0d9959c7a643b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bffd2e290b62c75abc87ca6dd76b6027bd06ea1ad6cc5c7d19bce5cce9b4ccc3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e18d4bd19d02103826c6ccfe1e046ad882c768a3c57be1799d9b12107011c97
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66c13a1a0518ee90714d59f79891d6fed6babf24ee82c1c888d7fca0cfbcc3ac
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.014534767961389285,
5
  "eval_steps": 500,
6
- "global_step": 1825,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2191,6 +2191,42 @@
2191
  "reward_std": 0.3459245666861534,
2192
  "rewards/custom_reward_simplified_v7_dblog": 0.925,
2193
  "step": 1820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2194
  }
2195
  ],
2196
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.014733874371819274,
5
  "eval_steps": 500,
6
+ "global_step": 1850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2191
  "reward_std": 0.3459245666861534,
2192
  "rewards/custom_reward_simplified_v7_dblog": 0.925,
2193
  "step": 1820
2194
+ },
2195
+ {
2196
+ "completion_length": 708.60625,
2197
+ "epoch": 0.014574589243475282,
2198
+ "grad_norm": 0.00766308419406414,
2199
+ "kl": 0.01772608202882111,
2200
+ "learning_rate": 8.110244809608494e-07,
2201
+ "loss": 0.0007,
2202
+ "reward": 0.73125,
2203
+ "reward_std": 0.2913930006325245,
2204
+ "rewards/custom_reward_simplified_v7_dblog": 0.73125,
2205
+ "step": 1830
2206
+ },
2207
+ {
2208
+ "completion_length": 660.0375,
2209
+ "epoch": 0.014654231807647279,
2210
+ "grad_norm": 0.20974037051200867,
2211
+ "kl": 0.014227323909290135,
2212
+ "learning_rate": 7.843959053281663e-07,
2213
+ "loss": 0.0006,
2214
+ "reward": 0.809375,
2215
+ "reward_std": 0.24926668480038644,
2216
+ "rewards/custom_reward_simplified_v7_dblog": 0.809375,
2217
+ "step": 1840
2218
+ },
2219
+ {
2220
+ "completion_length": 729.71875,
2221
+ "epoch": 0.014733874371819274,
2222
+ "grad_norm": 0.24099427461624146,
2223
+ "kl": 0.018935651518404484,
2224
+ "learning_rate": 7.581302419733633e-07,
2225
+ "loss": 0.0008,
2226
+ "reward": 0.690625,
2227
+ "reward_std": 0.32810748890042307,
2228
+ "rewards/custom_reward_simplified_v7_dblog": 0.690625,
2229
+ "step": 1850
2230
  }
2231
  ],
2232
  "logging_steps": 10,