Training in progress, step 50, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +38 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f17872418aae0905dfae97ebb7aebf37822ab2eb996c26e42a3aaf949bfcd271
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4107159b4de1a0016c3037c95f8767db5293436cfc3872136bfe4ef2abab1c4
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6747204f02d0ca992a089068f22369910c018177bb254de015c0f24b1ce164a1
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7a2e6285b957e4b33a1874bb188455e0aee029f3189c5296dea9eb7f8a4b5e5
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24472d7ba56158cdbd89fd1876b65e3d33b864a75708f9c89471972ee7e27f65
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b1cd6f4b1f4bf6d48fec1a76cbc162cf62188f491054248af00ed2b0989265a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f85c03bc0baab845592b9a318e5d8ba23f2d327a8bd0c8d4182e735b0e052fc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4dd73233e298447d6740eca7860dc08eb78819edfe930a45b32b54e01d37739
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0001991064104299902,
   "eval_steps": 500,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -31,6 +31,42 @@
       "reward_std": 0.25719649270176886,
       "rewards/custom_reward_simplified_v7_dblog": 0.496875,
       "step": 20
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0003982128208599804,
   "eval_steps": 500,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.25719649270176886,
       "rewards/custom_reward_simplified_v7_dblog": 0.496875,
       "step": 20
+    },
+    {
+      "completion_length": 750.46875,
+      "epoch": 0.00023892769251598824,
+      "grad_norm": 0.15792745351791382,
+      "kl": 0.0007828957575839012,
+      "learning_rate": 6.25e-07,
+      "loss": 0.0,
+      "reward": 0.684375,
+      "reward_std": 0.3755971297621727,
+      "rewards/custom_reward_simplified_v7_dblog": 0.684375,
+      "step": 30
+    },
+    {
+      "completion_length": 813.94375,
+      "epoch": 0.00031857025668798435,
+      "grad_norm": 0.12503573298454285,
+      "kl": 0.0007155703555326909,
+      "learning_rate": 8.333333333333333e-07,
+      "loss": 0.0,
+      "reward": 0.565625,
+      "reward_std": 0.2761854581534863,
+      "rewards/custom_reward_simplified_v7_dblog": 0.565625,
+      "step": 40
+    },
+    {
+      "completion_length": 747.675,
+      "epoch": 0.0003982128208599804,
+      "grad_norm": 0.10329681634902954,
+      "kl": 0.0007686431898036971,
+      "learning_rate": 1.0416666666666667e-06,
+      "loss": 0.0,
+      "reward": 0.621875,
+      "reward_std": 0.30715219378471376,
+      "rewards/custom_reward_simplified_v7_dblog": 0.621875,
+      "step": 50
     }
   ],
   "logging_steps": 10,