Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +62 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab9a1c3e4f7eeb26f1787aab2bede14a97d3e21b68a19b5777a98b5c4dc9c594
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a656f61ae1ccd60f5456798788c8e6ded39b418346d6fe6071eb8ce468298a83
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ecb6746c7a22945701e7525b6e96db148acb5c526d8ec4de574c6cd87337c3b9
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf849eb44752b2c9a42282b0505698ef4c3181196d5a7e2c98af9389f0ba6ef6
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eed43572ea1df9b6a964b9089ff0a48779e2a1cc8929f8355fea19e7f7ab5c5f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd3ccbab0f5afd279e81643827121534f4d8480da4449b3d948c3974c92f9bbd
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:905cad78b215386b1078b951a642067b64baccf738f77304c45fea8d964d3906
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8241f030a944f045c9ceca4728f16fdc2a35629a475d8a3997b745440061990
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.002787489746019863,
   "eval_steps": 500,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -427,6 +427,66 @@
       "reward_std": 0.15355074554681777,
       "rewards/custom_reward_logic_v2": 0.15562499798834323,
       "step": 350
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0031857025668798433,
   "eval_steps": 500,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.15355074554681777,
       "rewards/custom_reward_logic_v2": 0.15562499798834323,
       "step": 350
+    },
+    {
+      "completion_length": 21.21875,
+      "epoch": 0.002867132310191859,
+      "grad_norm": 0.8673160672187805,
+      "kl": 0.328788036108017,
+      "learning_rate": 3.3550503583141726e-06,
+      "loss": 0.0132,
+      "reward": 0.22808750197291375,
+      "reward_std": 0.14038661643862724,
+      "rewards/custom_reward_logic_v2": 0.22808750197291375,
+      "step": 360
+    },
+    {
+      "completion_length": 18.29375,
+      "epoch": 0.002946774874363855,
+      "grad_norm": 1.275578260421753,
+      "kl": 0.3586613781750202,
+      "learning_rate": 3.2517644987606827e-06,
+      "loss": 0.0143,
+      "reward": 0.09437500052154064,
+      "reward_std": 0.13283729180693626,
+      "rewards/custom_reward_logic_v2": 0.09437500052154064,
+      "step": 370
+    },
+    {
+      "completion_length": 19.625,
+      "epoch": 0.0030264174385358513,
+      "grad_norm": 1.135249376296997,
+      "kl": 0.3399433046579361,
+      "learning_rate": 3.147047612756302e-06,
+      "loss": 0.0136,
+      "reward": 0.18000000156462193,
+      "reward_std": 0.1102687232196331,
+      "rewards/custom_reward_logic_v2": 0.18000000156462193,
+      "step": 380
+    },
+    {
+      "completion_length": 18.65625,
+      "epoch": 0.0031060600027078473,
+      "grad_norm": 0.0214656013995409,
+      "kl": 0.3453727260231972,
+      "learning_rate": 3.0410990348452572e-06,
+      "loss": 0.0138,
+      "reward": 0.14312500059604644,
+      "reward_std": 0.21185824573040007,
+      "rewards/custom_reward_logic_v2": 0.14312500059604644,
+      "step": 390
+    },
+    {
+      "completion_length": 22.5875,
+      "epoch": 0.0031857025668798433,
+      "grad_norm": 1.1392817497253418,
+      "kl": 0.3561431519687176,
+      "learning_rate": 2.9341204441673267e-06,
+      "loss": 0.0142,
+      "reward": 0.09312500022351741,
+      "reward_std": 0.09467698186635971,
+      "rewards/custom_reward_logic_v2": 0.09312500022351741,
+      "step": 400
     }
   ],
   "logging_steps": 10,