Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +38 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61d5f3b3956507a0db89b2b255c80a675f44ff5e99a7e6ae7ea27a9dbae5245c
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:554efc4b31a2b5b45666110a342ac5c45b243dfe12be5334b44fda80de8f5aa4
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:640381d5936ef49033582656f8fe75d216ab5ee94c287238fec5029f5c57db33
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc8a2d96c7bf68d9800f4176d313fab93b3142d7c82237e72e2ae4d9f3e4cb61
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:841fc0dbc0dff2d8abfca3a999fe5c384dfee0019e02325f871bf3d7abbde657
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:edea5f37f705ad565383833c9ad60f92237a837032a0c28939c687cad7d33003
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51e44ce6c7f1c4d261c4ea0ef93ba7bc8b23a59d005cedfd24704873d0d6bb0b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:30a96a106a18e32bbd701b0dcdc53e964437fa7e7a4942bad969ac65b7f0ae1f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.000995532052149951,
   "eval_steps": 500,
-  "global_step": 125,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -151,6 +151,42 @@
       "reward_std": 0.2573545627295971,
       "rewards/custom_reward_simplified_v7_dblog": 0.728125,
       "step": 120
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0011946384625799412,
   "eval_steps": 500,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.2573545627295971,
       "rewards/custom_reward_simplified_v7_dblog": 0.728125,
       "step": 120
+    },
+    {
+      "completion_length": 872.6375,
+      "epoch": 0.001035353334235949,
+      "grad_norm": 0.11807532608509064,
+      "kl": 0.0007370044564595446,
+      "learning_rate": 2.7083333333333334e-06,
+      "loss": 0.0,
+      "reward": 0.45,
+      "reward_std": 0.24368184804916382,
+      "rewards/custom_reward_simplified_v7_dblog": 0.45,
+      "step": 130
+    },
+    {
+      "completion_length": 780.325,
+      "epoch": 0.0011149958984079452,
+      "grad_norm": 0.21067936718463898,
+      "kl": 0.0007969280297402293,
+      "learning_rate": 2.916666666666667e-06,
+      "loss": 0.0,
+      "reward": 0.671875,
+      "reward_std": 0.3312204420566559,
+      "rewards/custom_reward_simplified_v7_dblog": 0.671875,
+      "step": 140
+    },
+    {
+      "completion_length": 796.15625,
+      "epoch": 0.0011946384625799412,
+      "grad_norm": 0.11178277432918549,
+      "kl": 0.0007584215141832829,
+      "learning_rate": 3.125e-06,
+      "loss": 0.0,
+      "reward": 0.675,
+      "reward_std": 0.2411833107471466,
+      "rewards/custom_reward_simplified_v7_dblog": 0.675,
+      "step": 150
     }
   ],
   "logging_steps": 10,