Training in progress, step 2025, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d841bdf6b4728ba25ddfa075a6ecf7ffcd91ad64f151f5984cfdb0fb36616e2
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee2c4957d38c92a35e2e3a9382278d93a60be6d048e22e2962b28edcfeb9f100
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b591aa40759fbc489d41cede5b6d509cefae3af7c306b30fa1e6a7a4b8ec4837
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:981c9ca2b7a4413155c1784a3b128f562c7cb222b2b00cf2bcd73b66aba50336
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9ffa2392d644ba2690b0835df4eac79e599506fd693b988dfb49d247c7e500c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e6d264aea217f296bd9612ca82a53239d5b10338e8322f94ac8e49def93b492
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0119210f3709b267b1dcdc2165f2b55aac98c420d5275ee5428e502b1f632094
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:675f1831a53f04b0e24989b6bc8d9a17d48e7e3a0e3b788250eb946e8a1ecf93
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.015928512834399215,
   "eval_steps": 500,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2407,6 +2407,30 @@
       "reward_std": 0.23378355875611306,
       "rewards/custom_reward_simplified_v7_dblog": 0.921875,
       "step": 2000
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.016127619244829205,
   "eval_steps": 500,
+  "global_step": 2025,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.23378355875611306,
       "rewards/custom_reward_simplified_v7_dblog": 0.921875,
       "step": 2000
+    },
+    {
+      "completion_length": 710.65,
+      "epoch": 0.016008155398571214,
+      "grad_norm": 0.13598495721817017,
+      "kl": 0.01561300114262849,
+      "learning_rate": 3.915213854677863e-07,
+      "loss": 0.0006,
+      "reward": 0.859375,
+      "reward_std": 0.22324086129665374,
+      "rewards/custom_reward_simplified_v7_dblog": 0.859375,
+      "step": 2010
+    },
+    {
+      "completion_length": 600.3625,
+      "epoch": 0.01608779796274321,
+      "grad_norm": 0.33102965354919434,
+      "kl": 0.01562973433174193,
+      "learning_rate": 3.722083189075007e-07,
+      "loss": 0.0006,
+      "reward": 1.0125,
+      "reward_std": 0.37898894101381303,
+      "rewards/custom_reward_simplified_v7_dblog": 1.0125,
+      "step": 2020
     }
   ],
   "logging_steps": 10,