Training in progress, step 1925, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:013e1a4a2e514b45c1477302c11e1065adcc3bbe1c09bc675409e2c5ae75c7df
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:400bf9743fcfff3f47e3f0b9a1cede38e8d6e96374ac2cd587f2d2edfd906572
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea3f86dcd6bd8021e80606e175ae4af07751c3370190d4d053658d8ae6e55f26
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:3309a30e849ace9608e1957368fc06f650c97cc91eaa6df6d4bf2f6b649868f3
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81580e1e88878dfebd843045199ab5edec2e92f7c132f2ea86bcce1ce7f5e2ef
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c65db11f2bbb866945208742b8cd4b8865acadf113d5d9fbfe55b269b5ff1059
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:875915e5d6f2e2f0e0a7a4a850b61c3c410ff67126d9daeb7cdaa64d28801ee5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:deeb8215b1ed4392892b832a6e768b3c4ae9ca65d4af274686a16e7d74532396
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.015132087192679255,
   "eval_steps": 500,
-  "global_step": 1900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2287,6 +2287,30 @@
       "reward_std": 0.2382744610309601,
       "rewards/custom_reward_simplified_v7_dblog": 0.725,
       "step": 1900
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.015331193603109246,
   "eval_steps": 500,
+  "global_step": 1925,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.2382744610309601,
       "rewards/custom_reward_simplified_v7_dblog": 0.725,
       "step": 1900
+    },
+    {
+      "completion_length": 767.7375,
+      "epoch": 0.015211729756851252,
+      "grad_norm": 0.1330222189426422,
+      "kl": 0.02190765142440796,
+      "learning_rate": 6.084630428312679e-07,
+      "loss": 0.0009,
+      "reward": 0.66875,
+      "reward_std": 0.27546602860093117,
+      "rewards/custom_reward_simplified_v7_dblog": 0.66875,
+      "step": 1910
+    },
+    {
+      "completion_length": 726.63125,
+      "epoch": 0.015291372321023247,
+      "grad_norm": 0.21655875444412231,
+      "kl": 0.02581467442214489,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 0.001,
+      "reward": 0.834375,
+      "reward_std": 0.38373097851872445,
+      "rewards/custom_reward_simplified_v7_dblog": 0.834375,
+      "step": 1920
     }
   ],
   "logging_steps": 10,