Training in progress, step 750, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +38 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:005b3402f16489e2f67dfdc3d23e9b70931743c9bfd90fbe0b9df9c0fcaa7181
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:df9b8df9a3bcfa7793084f2c491e074f5bd3c4876f1de283980446d052bd9416
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87812fd1302b0602159fd01cdae3acd4150d67e06d7b2daaf6949356ea5d360e
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa436fad63bef13e1a414cbf3f838549d7499163af0e47cc9779ce02592bd406
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f14edd6888bcd521e87d23d882bc9d0658a0613355ce894e4f94a5b1eda5455
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee7c52c324bc53ae464edbc3c1cfa99789369f21c7d8544ae8559cb215adb050
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36288734d02808044e8b560e68ecd8751d3692a2ff33e2976d9cbec75c95c60f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:09a89f840a88cd313584cc217e93a9306c165d4938e58831fcfab48fea824fe7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.005774085902469716,
   "eval_steps": 500,
-  "global_step": 725,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -871,6 +871,42 @@
       "reward_std": 0.25477964654564855,
       "rewards/custom_reward_simplified_v7_dblog": 0.634375,
       "step": 720
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.005973192312899706,
   "eval_steps": 500,
+  "global_step": 750,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.25477964654564855,
       "rewards/custom_reward_simplified_v7_dblog": 0.634375,
       "step": 720
+    },
+    {
+      "completion_length": 607.86875,
+      "epoch": 0.005813907184555714,
+      "grad_norm": 0.20680995285511017,
+      "kl": 0.0055589195340871814,
+      "learning_rate": 4.391536957168733e-06,
+      "loss": 0.0002,
+      "reward": 0.8,
+      "reward_std": 0.32480863481760025,
+      "rewards/custom_reward_simplified_v7_dblog": 0.8,
+      "step": 730
+    },
+    {
+      "completion_length": 674.13125,
+      "epoch": 0.00589354974872771,
+      "grad_norm": 0.005594769027084112,
+      "kl": 0.005972519854549318,
+      "learning_rate": 4.367562678102491e-06,
+      "loss": 0.0002,
+      "reward": 0.665625,
+      "reward_std": 0.20820673778653145,
+      "rewards/custom_reward_simplified_v7_dblog": 0.665625,
+      "step": 740
+    },
+    {
+      "completion_length": 639.69375,
+      "epoch": 0.005973192312899706,
+      "grad_norm": 0.11012833565473557,
+      "kl": 0.005814655229914934,
+      "learning_rate": 4.34319334202531e-06,
+      "loss": 0.0002,
+      "reward": 0.796875,
+      "reward_std": 0.34761993661522866,
+      "rewards/custom_reward_simplified_v7_dblog": 0.796875,
+      "step": 750
     }
   ],
   "logging_steps": 10,