Training in progress, step 350, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +62 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc3cf965cb3e46bce1a07e647d9e64ded38cef1306287e6c24c592e3bafdafa6
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab9a1c3e4f7eeb26f1787aab2bede14a97d3e21b68a19b5777a98b5c4dc9c594
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99f35c97132fdb9c45702a3a626946b943e4f0e9d6e04d821cf969f555bfc36f
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:ecb6746c7a22945701e7525b6e96db148acb5c526d8ec4de574c6cd87337c3b9
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9dc64f06f07f11b5133abb444d47a3661de90bef33673b0253120e1e16093534
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:eed43572ea1df9b6a964b9089ff0a48779e2a1cc8929f8355fea19e7f7ab5c5f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e96b3a3c408ef67987cac348d29150759cd1e3152271b07b879ff530abc69a0f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:905cad78b215386b1078b951a642067b64baccf738f77304c45fea8d964d3906
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0023892769251598824,
   "eval_steps": 500,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -367,6 +367,66 @@
       "reward_std": 0.07851103022694587,
       "rewards/custom_reward_logic_v2": 0.10625000111758709,
       "step": 300
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.002787489746019863,
   "eval_steps": 500,
+  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.07851103022694587,
       "rewards/custom_reward_logic_v2": 0.10625000111758709,
       "step": 300
+    },
+    {
+      "completion_length": 28.39375,
+      "epoch": 0.0024689194893318784,
+      "grad_norm": 1.2737127542495728,
+      "kl": 0.3259002223610878,
+      "learning_rate": 3.8432490208670605e-06,
+      "loss": 0.013,
+      "reward": 0.07012500055134296,
+      "reward_std": 0.21550666987895967,
+      "rewards/custom_reward_logic_v2": 0.07012500055134296,
+      "step": 310
+    },
+    {
+      "completion_length": 20.49375,
+      "epoch": 0.002548562053503875,
+      "grad_norm": 1.3667010068893433,
+      "kl": 0.32961594611406325,
+      "learning_rate": 3.7500000000000005e-06,
+      "loss": 0.0132,
+      "reward": 0.15562500059604645,
+      "reward_std": 0.14379026368260384,
+      "rewards/custom_reward_logic_v2": 0.15562500059604645,
+      "step": 320
+    },
+    {
+      "completion_length": 23.7625,
+      "epoch": 0.002628204617675871,
+      "grad_norm": 0.9662195444107056,
+      "kl": 0.3291011206805706,
+      "learning_rate": 3.654371533087586e-06,
+      "loss": 0.0132,
+      "reward": 0.20617500003427267,
+      "reward_std": 0.12530190348625184,
+      "rewards/custom_reward_logic_v2": 0.20617500003427267,
+      "step": 330
+    },
+    {
+      "completion_length": 19.15,
+      "epoch": 0.002707847181847867,
+      "grad_norm": 2.964785099029541,
+      "kl": 0.3629206448793411,
+      "learning_rate": 3.556545654351749e-06,
+      "loss": 0.0145,
+      "reward": 0.10437500067055225,
+      "reward_std": 0.12071752324700355,
+      "rewards/custom_reward_logic_v2": 0.10437500067055225,
+      "step": 340
+    },
+    {
+      "completion_length": 20.4875,
+      "epoch": 0.002787489746019863,
+      "grad_norm": 1.0044533014297485,
+      "kl": 0.3254102662205696,
+      "learning_rate": 3.4567085809127247e-06,
+      "loss": 0.013,
+      "reward": 0.15562499798834323,
+      "reward_std": 0.15355074554681777,
+      "rewards/custom_reward_logic_v2": 0.15562499798834323,
+      "step": 350
     }
   ],
   "logging_steps": 10,