Training in progress, step 700, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +62 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7927585b38c284985305607dff1216956b2ec6766011237d98f764aa65d8a68
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e023b69876a28967f68d2e2ca3f8be837b2c4981d6e3ad2049a01c6786eaa312
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad7ca2ba371f72eb8e0be449ebbd259143484bb427530b2555406e081648b613
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d213ee4b82282a82674cd9ce017085e7a7b3603c3dad380d9e3921fa872458b
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7455eeaa206bf36d4b15096f6439d20efc5b6e48cc80079c496864649b7e925
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:df73025cee4dddc3e485bae9a5251784ca346a6e5d55d7c1b83f877ae248859a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6841cf52b84fc804fe9e82d0922149bacf5c720074060a8df77995cba4f40aaa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b98b9203fd15f057816eda32716436bfe57e83707db8ff60acfc089588f9da41
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.005176766671179746,
   "eval_steps": 500,
-  "global_step": 650,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -787,6 +787,66 @@
       "reward_std": 0.11013087928295136,
       "rewards/custom_reward_logic_v2": 0.0818750023841858,
       "step": 650
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.005574979492039726,
   "eval_steps": 500,
+  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.11013087928295136,
       "rewards/custom_reward_logic_v2": 0.0818750023841858,
       "step": 650
+    },
+    {
+      "completion_length": 19.23125,
+      "epoch": 0.005256409235351742,
+      "grad_norm": 1.1138290166854858,
+      "kl": 0.3609082795679569,
+      "learning_rate": 4.5211988927752026e-07,
+      "loss": 0.0144,
+      "reward": 0.13375000059604644,
+      "reward_std": 0.22290636524558066,
+      "rewards/custom_reward_logic_v2": 0.13375000059604644,
+      "step": 660
+    },
+    {
+      "completion_length": 21.1375,
+      "epoch": 0.005336051799523738,
+      "grad_norm": 0.7887033820152283,
+      "kl": 0.36398947462439535,
+      "learning_rate": 3.915213854677863e-07,
+      "loss": 0.0146,
+      "reward": 0.07750000171363354,
+      "reward_std": 0.09986742436885834,
+      "rewards/custom_reward_logic_v2": 0.07750000171363354,
+      "step": 670
+    },
+    {
+      "completion_length": 20.7375,
+      "epoch": 0.005415694363695734,
+      "grad_norm": 1.2118674516677856,
+      "kl": 0.34819948896765707,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 0.0139,
+      "reward": 0.14000000059604645,
+      "reward_std": 0.15659263283014296,
+      "rewards/custom_reward_logic_v2": 0.14000000059604645,
+      "step": 680
+    },
+    {
+      "completion_length": 25.03125,
+      "epoch": 0.00549533692786773,
+      "grad_norm": 0.6678434014320374,
+      "kl": 0.3506194405257702,
+      "learning_rate": 2.8247291705444575e-07,
+      "loss": 0.014,
+      "reward": 0.10087500289082527,
+      "reward_std": 0.19995234534144402,
+      "rewards/custom_reward_logic_v2": 0.10087500289082527,
+      "step": 690
+    },
+    {
+      "completion_length": 19.75,
+      "epoch": 0.005574979492039726,
+      "grad_norm": 1.028297781944275,
+      "kl": 0.33339232876896857,
+      "learning_rate": 2.3423053240837518e-07,
+      "loss": 0.0133,
+      "reward": 0.09312500022351741,
+      "reward_std": 0.09688087031245232,
+      "rewards/custom_reward_logic_v2": 0.09312500022351741,
+      "step": 700
     }
   ],
   "logging_steps": 10,