Training in progress, step 250, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +62 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d0b10032621567c53e0e12f4051e3fa7f9880f7b87ae153f1600c2eed98d364
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd787e707294bb8bff5a0916c3fc1454572d55e0dd5aa3f2c0e8c49ccd9c4af3
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4c7399a7124ebfc8f2602ee4160ffecfe9c2c5345d69e9697c401f6fd3bd73d
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6f84bfa1b6ca95587c76a1fd509c395f4674dc76fe078a46e0a319386062a54
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d71d011e87b512f28e794476e44bdcb409ab9a4721e9b4147120eeb12f1053d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a3313de4fd5d6c377ad3ad093a56c26fc951ef200a8e1b32181726369bbbd9a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47394e7e6639b2da14254e3e88e50a05cb1b1d15a05d1aa46398ae3b93c7909f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d875190fd38553ed31bf8899557c08005bdf782b4117087e46fd1724f637210
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0015928512834399217,
   "eval_steps": 500,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -247,6 +247,66 @@
       "reward_std": 0.09681975245475768,
       "rewards/custom_reward_logic_v2": 0.06411250084638595,
       "step": 200
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.001991064104299902,
   "eval_steps": 500,
+  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.09681975245475768,
       "rewards/custom_reward_logic_v2": 0.06411250084638595,
       "step": 200
+    },
+    {
+      "completion_length": 19.3,
+      "epoch": 0.0016724938476119177,
+      "grad_norm": 0.03454764187335968,
+      "kl": 0.3337583176791668,
+      "learning_rate": 4.608478614532215e-06,
+      "loss": 0.0134,
+      "reward": 0.21312500052154065,
+      "reward_std": 0.1542310357093811,
+      "rewards/custom_reward_logic_v2": 0.21312500052154065,
+      "step": 210
+    },
+    {
+      "completion_length": 48.96875,
+      "epoch": 0.0017521364117839139,
+      "grad_norm": 0.8877259492874146,
+      "kl": 0.3230514988303185,
+      "learning_rate": 4.54788011072248e-06,
+      "loss": 0.0129,
+      "reward": -0.12147499993443489,
+      "reward_std": 0.4157312333583832,
+      "rewards/custom_reward_logic_v2": -0.12147499993443489,
+      "step": 220
+    },
+    {
+      "completion_length": 19.41875,
+      "epoch": 0.0018317789759559099,
+      "grad_norm": 0.7465932369232178,
+      "kl": 0.32680382803082464,
+      "learning_rate": 4.4833833507280884e-06,
+      "loss": 0.0131,
+      "reward": 0.14000000059604645,
+      "reward_std": 0.09731742069125175,
+      "rewards/custom_reward_logic_v2": 0.14000000059604645,
+      "step": 230
+    },
+    {
+      "completion_length": 23.425,
+      "epoch": 0.001911421540127906,
+      "grad_norm": 0.4111487567424774,
+      "kl": 0.3509559452533722,
+      "learning_rate": 4.415111107797445e-06,
+      "loss": 0.014,
+      "reward": 0.18286250159144402,
+      "reward_std": 0.1811980500817299,
+      "rewards/custom_reward_logic_v2": 0.18286250159144402,
+      "step": 240
+    },
+    {
+      "completion_length": 18.91875,
+      "epoch": 0.001991064104299902,
+      "grad_norm": 0.8882763385772705,
+      "kl": 0.3525215476751328,
+      "learning_rate": 4.34319334202531e-06,
+      "loss": 0.0141,
+      "reward": 0.17062499970197678,
+      "reward_std": 0.11504097878932953,
+      "rewards/custom_reward_logic_v2": 0.17062499970197678,
+      "step": 250
     }
   ],
   "logging_steps": 10,