Training in progress, step 1125, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49b80ac9e71a99d4c4c6a019f92f74ac748ae8a3db5b19d23b29551127562576
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d151f506d17ce4e0c03a3713c88478bcbdda4a3e34a91b28b4bd8b989b401da7
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b954913df4d8f33fa5530a046cefed963703da18792e9717aec1ad27ac9b9ac
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:59f54450bc787ffff804bacadefa39d8e88180d2ae39b043d6f32742458a1a52
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29c2d81a2ee2dbcbac40eefdb89ca90568c2c93f1ddee20eba510e848b8988fb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:85c6ddcd387376a529404ae7199a4e5de6a96bca66488faa804a60e28809229c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa66b03381b0dfe975a2c18907018f054a16534d5b1412711280057eb4458970
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:11fc5977afef8cdfd86c063f7ec5cbc1d6cc536eaf4ad77613e346ad3657a2d8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.00876068205891957,
   "eval_steps": 500,
-  "global_step": 1100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1327,6 +1327,30 @@
       "reward_std": 0.25932966247200967,
       "rewards/custom_reward_simplified_v7_dblog": 0.71875,
       "step": 1100
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.008959788469349559,
   "eval_steps": 500,
+  "global_step": 1125,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.25932966247200967,
       "rewards/custom_reward_simplified_v7_dblog": 0.71875,
       "step": 1100
+    },
+    {
+      "completion_length": 700.48125,
+      "epoch": 0.008840324623091565,
+      "grad_norm": 0.28138336539268494,
+      "kl": 0.007267917576245964,
+      "learning_rate": 3.2517644987606827e-06,
+      "loss": 0.0003,
+      "reward": 0.9125,
+      "reward_std": 0.33715927675366403,
+      "rewards/custom_reward_simplified_v7_dblog": 0.9125,
+      "step": 1110
+    },
+    {
+      "completion_length": 662.26875,
+      "epoch": 0.008919967187263561,
+      "grad_norm": 0.1348627209663391,
+      "kl": 0.007481782068498433,
+      "learning_rate": 3.217008081777726e-06,
+      "loss": 0.0003,
+      "reward": 0.728125,
+      "reward_std": 0.2547163799405098,
+      "rewards/custom_reward_simplified_v7_dblog": 0.728125,
+      "step": 1120
     }
   ],
   "logging_steps": 10,