Training in progress, step 1275, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b01a64a72b5add0f6fd2fb87887bf51cc687c3d81596e93d4bc43a3262efdc8
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0bad81dc96084ab5ddee74a4664e886906e4be5ca86b50b0a9fb72bc45570188
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:362d1e054b84b0d500e8438627b240c7e2097ee86b8e098e8f3c941c83016f24
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:732a3e23bab6a35c7c0b1fbd3eab25075735978514241e667277b9927b941f9d
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:945a25d4c45d543ac1804f21f5b94dcfff3f3d66f6934f364600a8816d57e5dc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0622740b7f529d67896bed5f8ff648e5729882e67681daaf8eb3ef3cf2b65e92
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1305a35e69c26a1911821f2a4173b0f97cfc01b88eb1cc64c99b91067e3e293
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fb41e5de709c0ea44e9bca507118fe887f61feeb314b07045c5c6637c0e7194
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.00995532052149951,
   "eval_steps": 500,
-  "global_step": 1250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1507,6 +1507,30 @@
       "reward_std": 0.19232839569449425,
       "rewards/custom_reward_simplified_v7_dblog": 0.6625,
       "step": 1250
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0101544269319295,
   "eval_steps": 500,
+  "global_step": 1275,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.19232839569449425,
       "rewards/custom_reward_simplified_v7_dblog": 0.6625,
       "step": 1250
+    },
+    {
+      "completion_length": 615.5875,
+      "epoch": 0.010034963085671506,
+      "grad_norm": 0.14006367325782776,
+      "kl": 0.008278649020940065,
+      "learning_rate": 2.717889356869146e-06,
+      "loss": 0.0003,
+      "reward": 0.903125,
+      "reward_std": 0.3407335430383682,
+      "rewards/custom_reward_simplified_v7_dblog": 0.903125,
+      "step": 1260
+    },
+    {
+      "completion_length": 727.70625,
+      "epoch": 0.010114605649843502,
+      "grad_norm": 0.005724642425775528,
+      "kl": 0.009203878976404668,
+      "learning_rate": 2.681644926806527e-06,
+      "loss": 0.0004,
+      "reward": 0.60625,
+      "reward_std": 0.2156815566122532,
+      "rewards/custom_reward_simplified_v7_dblog": 0.60625,
+      "step": 1270
     }
   ],
   "logging_steps": 10,