Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +62 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1701d1397a8f6bba03a038aad0ad88dddbe56212ed6ac753bf48dccf50090e24
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f0b9fe960d8b700432d64261f2e9fb8b5e2feb648ccba2b26954af39f14f187
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d5cbf4360926fd5a69ab224acee68af41a01edad72a7837d83695317fff4262
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:6980510b381af43d0d08ceb7a59264815242f1f6223f0a5de785069b7c5b74d7
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7882cf9d1800e045d97afc34ed2d790cd5f0da147adeb6824c51ec77a35e0c5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:63e7217b76d9787359cf45d83ba0b63cb6335c60810ebfe7324880fdde71d442
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47e7c3293120b0e1021fffede4430570f0c03435609ec93915f9f3961852aa6d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9d42f94a00d3d38a441f86dbb87d1da2ac6b6e6fceeb3fff0437ffb348f193a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0007964256417199608,
   "eval_steps": 500,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -127,6 +127,66 @@
       "reward_std": 1.2458222389221192,
       "rewards/custom_reward_logic_v2": -3.805912530422211,
       "step": 100
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0011946384625799412,
   "eval_steps": 500,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 1.2458222389221192,
       "rewards/custom_reward_logic_v2": -3.805912530422211,
       "step": 100
+    },
+    {
+      "completion_length": 729.04375,
+      "epoch": 0.0008760682058919569,
+      "grad_norm": 0.1617293506860733,
+      "kl": 0.0018967354553751647,
+      "learning_rate": 4.978612153434527e-06,
+      "loss": 0.0001,
+      "reward": -3.071175017207861,
+      "reward_std": 1.3524149775505065,
+      "rewards/custom_reward_logic_v2": -3.071175017207861,
+      "step": 110
+    },
+    {
+      "completion_length": 641.74375,
+      "epoch": 0.000955710770063953,
+      "grad_norm": 0.26608461141586304,
+      "kl": 0.0029119997401721776,
+      "learning_rate": 4.962019382530521e-06,
+      "loss": 0.0001,
+      "reward": -2.690687493979931,
+      "reward_std": 1.0754198133945465,
+      "rewards/custom_reward_logic_v2": -2.690687493979931,
+      "step": 120
+    },
+    {
+      "completion_length": 883.9875,
+      "epoch": 0.001035353334235949,
+      "grad_norm": 0.7612231373786926,
+      "kl": 0.003597881377208978,
+      "learning_rate": 4.9407400177998335e-06,
+      "loss": 0.0001,
+      "reward": -3.8035999715328215,
+      "reward_std": 1.2502110481262207,
+      "rewards/custom_reward_logic_v2": -3.8035999715328215,
+      "step": 130
+    },
+    {
+      "completion_length": 524.48125,
+      "epoch": 0.0011149958984079452,
+      "grad_norm": 0.9543402791023254,
+      "kl": 0.08978197913384064,
+      "learning_rate": 4.914814565722671e-06,
+      "loss": 0.0036,
+      "reward": -2.0596874909475447,
+      "reward_std": 1.3678732179105282,
+      "rewards/custom_reward_logic_v2": -2.0596874909475447,
+      "step": 140
+    },
+    {
+      "completion_length": 46.43125,
+      "epoch": 0.0011946384625799412,
+      "grad_norm": 0.7850804328918457,
+      "kl": 0.3018287725746632,
+      "learning_rate": 4.884292376870567e-06,
+      "loss": 0.0121,
+      "reward": -0.04024999849498272,
+      "reward_std": 0.430637900531292,
+      "rewards/custom_reward_logic_v2": -0.04024999849498272,
+      "step": 150
     }
   ],
   "logging_steps": 10,