Training in progress, step 2, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:285e67517f237a9697081434580f857010d36aa013c6ff890622d9e480e2b094
 size 204312

 version https://git-lfs.github.com/spec/v1
+oid sha256:98b2cf74a2b5d4bd40902b401fc380e1b8ef6161013caaa3aa9168b0d06003a4
 size 204312

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f58d991949f026bfdaafad80eba73863d7e85b5f7867f1db8d6d8857934e5dc
 size 442938

 version https://git-lfs.github.com/spec/v1
+oid sha256:302f8b6e002ca4408d7ef6b3f2cf11f5aa55b25a6ac5e2d9987fa542e4fc5384
 size 442938

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f067b395d7810960b1ede296260d51dd059e8878d3d12b6992cd49b154dc5a2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d66fa99c562c6e3ce75a07c2582cf26da8b48f30d2f96295ec230f94991541d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:764cf599d0b83c5d302be0978d09e435a466a8511dc1f7e86d5ee3704818d46f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2972d8cacb091bd0e98bfff54b2e4933301753087ce149a9bf9a205a5e2d795
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.9230029614245606e-05,
   "eval_steps": 500,
-  "global_step": 1,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -24,6 +24,22 @@
       "rewards/reward_func_keywords": 0.0,
       "rewards/reward_func_length": 664.5,
       "step": 1
     }
   ],
   "logging_steps": 1,
@@ -38,7 +54,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.846005922849121e-05,
   "eval_steps": 500,
+  "global_step": 2,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "rewards/reward_func_keywords": 0.0,
       "rewards/reward_func_length": 664.5,
       "step": 1
+    },
+    {
+      "clip_ratio": 0.0,
+      "completion_length": 110.0,
+      "epoch": 3.846005922849121e-05,
+      "grad_norm": 0.4941328465938568,
+      "kl": 0.0,
+      "learning_rate": 2e-05,
+      "loss": -0.045,
+      "num_tokens": 592.0,
+      "reward": 404.5643005371094,
+      "reward_std": 7.8101983070373535,
+      "rewards/reward_func_conciseness": 1.0643115043640137,
+      "rewards/reward_func_keywords": 0.0,
+      "rewards/reward_func_length": 403.5,
+      "step": 2
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }