Training in progress, step 1825, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47a7f2a9e5514ded39fec42ccf85affb0ea83765d8cd5710f5ffae2ac93a7539
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce0227de8dffd60e7bcbc361e28f5f14d86f6b8aa6b9faaa25078af2c1664371
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad0f2461614af492d300b806eb46f3bbcfd9aac2534006fb03315af5654f03d1
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b50241b912450499aa67b6f47d8ef5d57cc918130f305986edc730a6c70d0be
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8238871797f984a007bcc429ff439a023be817586923e09bedcced1680e3b8e1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bdc279ccf06d94b21f0f1142b3ba0467a4b037c890e7d4c8b4d0d9959c7a643b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5affcab53c6bf0c312245e4ec3117742a7ae09a65ad0d199a0c62a7385ad2300
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e18d4bd19d02103826c6ccfe1e046ad882c768a3c57be1799d9b12107011c97
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.014335661550959295,
   "eval_steps": 500,
-  "global_step": 1800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2167,6 +2167,30 @@
       "reward_std": 0.28967257887125014,
       "rewards/custom_reward_simplified_v7_dblog": 0.75,
       "step": 1800
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.014534767961389285,
   "eval_steps": 500,
+  "global_step": 1825,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.28967257887125014,
       "rewards/custom_reward_simplified_v7_dblog": 0.75,
       "step": 1800
+    },
+    {
+      "completion_length": 689.26875,
+      "epoch": 0.01441530411513129,
+      "grad_norm": 0.17589329183101654,
+      "kl": 0.016255489736795425,
+      "learning_rate": 8.653477618573261e-07,
+      "loss": 0.0007,
+      "reward": 0.765625,
+      "reward_std": 0.3363394603133202,
+      "rewards/custom_reward_simplified_v7_dblog": 0.765625,
+      "step": 1810
+    },
+    {
+      "completion_length": 640.91875,
+      "epoch": 0.014494946679303287,
+      "grad_norm": 0.21075929701328278,
+      "kl": 0.015922663966193795,
+      "learning_rate": 8.380103359651554e-07,
+      "loss": 0.0006,
+      "reward": 0.925,
+      "reward_std": 0.3459245666861534,
+      "rewards/custom_reward_simplified_v7_dblog": 0.925,
+      "step": 1820
     }
   ],
   "logging_steps": 10,