Training in progress, step 1950, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +38 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:400bf9743fcfff3f47e3f0b9a1cede38e8d6e96374ac2cd587f2d2edfd906572
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3982e7c096a70a0347821728b97811479223f1df6099204e37383837d91f6f5
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3309a30e849ace9608e1957368fc06f650c97cc91eaa6df6d4bf2f6b649868f3
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:6cc874fd946b4a641da998c3cb0bfa07fd14122b6f28011d2891746400a37ff9
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c65db11f2bbb866945208742b8cd4b8865acadf113d5d9fbfe55b269b5ff1059
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a81ead0fc04d8776eae671d9a86963a7d3e1d3d1b066678dcfa501494dc5b51a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:deeb8215b1ed4392892b832a6e768b3c4ae9ca65d4af274686a16e7d74532396
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:91dcb5b2d000f105f4d6d1ae7170d8fafaf43f44a4f442aeb38a35f6c7a13bc9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.015331193603109246,
   "eval_steps": 500,
-  "global_step": 1925,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2311,6 +2311,42 @@
       "reward_std": 0.38373097851872445,
       "rewards/custom_reward_simplified_v7_dblog": 0.834375,
       "step": 1920
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.015530300013539236,
   "eval_steps": 500,
+  "global_step": 1950,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.38373097851872445,
       "rewards/custom_reward_simplified_v7_dblog": 0.834375,
       "step": 1920
+    },
+    {
+      "completion_length": 688.56875,
+      "epoch": 0.015371014885195244,
+      "grad_norm": 0.22155120968818665,
+      "kl": 0.025313653564080597,
+      "learning_rate": 5.617198567963353e-07,
+      "loss": 0.001,
+      "reward": 0.64375,
+      "reward_std": 0.2539114162325859,
+      "rewards/custom_reward_simplified_v7_dblog": 0.64375,
+      "step": 1930
+    },
+    {
+      "completion_length": 676.9125,
+      "epoch": 0.01545065744936724,
+      "grad_norm": 0.2373446673154831,
+      "kl": 0.018907574540935456,
+      "learning_rate": 5.389608377010608e-07,
+      "loss": 0.0008,
+      "reward": 0.821875,
+      "reward_std": 0.1906539335846901,
+      "rewards/custom_reward_simplified_v7_dblog": 0.821875,
+      "step": 1940
+    },
+    {
+      "completion_length": 640.675,
+      "epoch": 0.015530300013539236,
+      "grad_norm": 0.1865774542093277,
+      "kl": 0.014899229886941612,
+      "learning_rate": 5.166166492719124e-07,
+      "loss": 0.0006,
+      "reward": 0.725,
+      "reward_std": 0.2747412838041782,
+      "rewards/custom_reward_simplified_v7_dblog": 0.725,
+      "step": 1950
     }
   ],
   "logging_steps": 10,