Training in progress, step 735, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +34 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5556c42afd4cbcccc6b8c7daeeed304b98a9f69cff50dce9ba7b205f95dd41cd
 size 645975704

 version https://git-lfs.github.com/spec/v1
+oid sha256:bef26c2a2853a24f1b1c9805d18570a11582a968162d8ec559b2eb99caf6a905
 size 645975704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5729627a47d753fb4652198309b47c4ee8d996325418926805a63ba81123f555
 size 1292182139

 version https://git-lfs.github.com/spec/v1
+oid sha256:11d1a002f539fc5bc604de71a5a7d6f1fa4338b74515c7488f80fcd90cbd2908
 size 1292182139

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69507df2b8a5c1c7c0f2966d469e9dc7a76d78b5c8dd43ad70533a04eda7e6f9
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:91bfd02584ff4a4b7257c19902379baf05dddfa2f12bf1176cb250ae41bd5889
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f290ffa291ab5f169b101e10cc8f320158a20b2ff211b18ae56cf14b41a64b36
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:010a2386ada1bde03d03b30051618785b4ef8543babe46fbc7b87cf0823779a5
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 100,
   "best_metric": 0.3715035319328308,
   "best_model_checkpoint": "qwen2.5-coder-7b-agentic-cot/checkpoint-100",
-  "epoch": 14.290155440414507,
   "eval_steps": 50,
-  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -862,6 +862,36 @@
       "eval_samples_per_second": 1.873,
       "eval_steps_per_second": 0.261,
       "step": 700
     }
   ],
   "logging_steps": 10,
@@ -876,12 +906,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.803058972717158e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": 100,
   "best_metric": 0.3715035319328308,
   "best_model_checkpoint": "qwen2.5-coder-7b-agentic-cot/checkpoint-100",
+  "epoch": 15.0,
   "eval_steps": 50,
+  "global_step": 735,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1.873,
       "eval_steps_per_second": 0.261,
       "step": 700
+    },
+    {
+      "entropy": 0.005753540876321494,
+      "epoch": 14.49740932642487,
+      "grad_norm": 0.01792309246957302,
+      "learning_rate": 6.839272934511143e-07,
+      "loss": 0.0027,
+      "mean_token_accuracy": 0.9992820754647255,
+      "num_tokens": 22815368.0,
+      "step": 710
+    },
+    {
+      "entropy": 0.005742728849872947,
+      "epoch": 14.704663212435234,
+      "grad_norm": 0.017534621059894562,
+      "learning_rate": 2.591857829770672e-07,
+      "loss": 0.0027,
+      "mean_token_accuracy": 0.9992718860507012,
+      "num_tokens": 23142299.0,
+      "step": 720
+    },
+    {
+      "entropy": 0.005674040759913623,
+      "epoch": 14.911917098445596,
+      "grad_norm": 0.016848629340529442,
+      "learning_rate": 3.6461540339682855e-08,
+      "loss": 0.0026,
+      "mean_token_accuracy": 0.9993263006210327,
+      "num_tokens": 23467258.0,
+      "step": 730
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.029005584744704e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null