Training in progress, step 650, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +62 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8552145847bfeab4636144433c43db62dcc91675e6d80bf8a61ef9290e9fc440
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7927585b38c284985305607dff1216956b2ec6766011237d98f764aa65d8a68
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eea82d271c17fd83a4656876cadcc33c1faa5ce883f21449769fbb145e1864a9
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad7ca2ba371f72eb8e0be449ebbd259143484bb427530b2555406e081648b613
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee66f0b7a716e07b51f02c7e7cb42ecc8627dd065518adcf55ce909d0ef818d8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7455eeaa206bf36d4b15096f6439d20efc5b6e48cc80079c496864649b7e925
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c12eb55eb955504de5fa1d8a599d32de65813670b5001ef30e45d7a6b99dbe1f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6841cf52b84fc804fe9e82d0922149bacf5c720074060a8df77995cba4f40aaa
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.004778553850319765,
   "eval_steps": 500,
-  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -727,6 +727,66 @@
       "reward_std": 0.1257291093468666,
       "rewards/custom_reward_logic_v2": 0.12624999806284903,
       "step": 600
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.005176766671179746,
   "eval_steps": 500,
+  "global_step": 650,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.1257291093468666,
       "rewards/custom_reward_logic_v2": 0.12624999806284903,
       "step": 600
+    },
+    {
+      "completion_length": 20.73125,
+      "epoch": 0.004858196414491761,
+      "grad_norm": 1.0636727809906006,
+      "kl": 0.32965768277645113,
+      "learning_rate": 8.110244809608494e-07,
+      "loss": 0.0132,
+      "reward": 0.1900000013411045,
+      "reward_std": 0.2276224449276924,
+      "rewards/custom_reward_logic_v2": 0.1900000013411045,
+      "step": 610
+    },
+    {
+      "completion_length": 21.09375,
+      "epoch": 0.004937838978663757,
+      "grad_norm": 0.6413007378578186,
+      "kl": 0.4171911731362343,
+      "learning_rate": 7.322330470336314e-07,
+      "loss": 0.0167,
+      "reward": 0.06624999977648258,
+      "reward_std": 0.08008950427174569,
+      "rewards/custom_reward_logic_v2": 0.06624999977648258,
+      "step": 620
+    },
+    {
+      "completion_length": 19.6125,
+      "epoch": 0.005017481542835753,
+      "grad_norm": 0.4128471612930298,
+      "kl": 0.3929149940609932,
+      "learning_rate": 6.568066579746901e-07,
+      "loss": 0.0157,
+      "reward": 0.1518750011920929,
+      "reward_std": 0.09802244454622269,
+      "rewards/custom_reward_logic_v2": 0.1518750011920929,
+      "step": 630
+    },
+    {
+      "completion_length": 18.51875,
+      "epoch": 0.00509712410700775,
+      "grad_norm": 1.2197966575622559,
+      "kl": 0.4328078910708427,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 0.0173,
+      "reward": 0.045000001043081286,
+      "reward_std": 0.08135274946689605,
+      "rewards/custom_reward_logic_v2": 0.045000001043081286,
+      "step": 640
+    },
+    {
+      "completion_length": 19.89375,
+      "epoch": 0.005176766671179746,
+      "grad_norm": 0.24087023735046387,
+      "kl": 0.39142851531505585,
+      "learning_rate": 5.166166492719124e-07,
+      "loss": 0.0157,
+      "reward": 0.0818750023841858,
+      "reward_std": 0.11013087928295136,
+      "rewards/custom_reward_logic_v2": 0.0818750023841858,
+      "step": 650
     }
   ],
   "logging_steps": 10,