Training in progress, step 725, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5a04e84830bbd6a9d15a7f1b7837c35b2c5aa3c3d810fb936a39fabd501f732
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:005b3402f16489e2f67dfdc3d23e9b70931743c9bfd90fbe0b9df9c0fcaa7181
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d9d953155e50b0ab53fcf21d7698c3548b16bde8faffe38265b83b4c176691fe
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:87812fd1302b0602159fd01cdae3acd4150d67e06d7b2daaf6949356ea5d360e
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d69b3ec032c894f09c29bf70e7979ef1f260d871795cce9f40bc39aed3eb516f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f14edd6888bcd521e87d23d882bc9d0658a0613355ce894e4f94a5b1eda5455
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29fe942acb075168b131d317a2f6faee5c51c7e00aad3609dac6f7c5e3261669
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:36288734d02808044e8b560e68ecd8751d3692a2ff33e2976d9cbec75c95c60f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.005574979492039726,
   "eval_steps": 500,
-  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -847,6 +847,30 @@
       "reward_std": 0.34091843143105505,
       "rewards/custom_reward_simplified_v7_dblog": 0.80625,
       "step": 700
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.005774085902469716,
   "eval_steps": 500,
+  "global_step": 725,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.34091843143105505,
       "rewards/custom_reward_simplified_v7_dblog": 0.80625,
       "step": 700
+    },
+    {
+      "completion_length": 725.675,
+      "epoch": 0.005654622056211722,
+      "grad_norm": 0.18473494052886963,
+      "kl": 0.005652935197576881,
+      "learning_rate": 4.438280143203665e-06,
+      "loss": 0.0002,
+      "reward": 0.66875,
+      "reward_std": 0.216452856361866,
+      "rewards/custom_reward_simplified_v7_dblog": 0.66875,
+      "step": 710
+    },
+    {
+      "completion_length": 764.26875,
+      "epoch": 0.005734264620383718,
+      "grad_norm": 0.17735017836093903,
+      "kl": 0.005824547982774675,
+      "learning_rate": 4.415111107797445e-06,
+      "loss": 0.0002,
+      "reward": 0.634375,
+      "reward_std": 0.25477964654564855,
+      "rewards/custom_reward_simplified_v7_dblog": 0.634375,
+      "step": 720
     }
   ],
   "logging_steps": 10,