Training in progress, step 12500, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/README.md +1 -1
last-checkpoint/adapter_config.json +4 -6
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -3
last-checkpoint/training_args.bin +1 -1

last-checkpoint/README.md CHANGED Viewed

@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 [More Information Needed]
 ### Framework versions
-- PEFT 0.15.1

 [More Information Needed]
 ### Framework versions
+- PEFT 0.14.0

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -3,7 +3,6 @@
   "auto_mapping": null,
   "base_model_name_or_path": "Qwen/Qwen3-0.6B",
   "bias": "none",
-  "corda_config": null,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
@@ -24,16 +23,15 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
     "q_proj",
-    "down_proj",
-    "up_proj",
     "o_proj",
     "k_proj",
-    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
-  "trainable_token_indices": null,
   "use_dora": false,
   "use_rslora": false
 }

   "auto_mapping": null,
   "base_model_name_or_path": "Qwen/Qwen3-0.6B",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
     "q_proj",
     "o_proj",
+    "v_proj",
+    "up_proj",
     "k_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false
 }

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32da8fce78ed226e77f431e33fe7e0f42f4f018a69f74c5398b0e2ccf1a881cb
 size 161533160

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d7f04d35541d77dbff70445b4364fcae91e44972bf3ab7e69103b284501ed21
 size 161533160

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1c53b527b35aefa1821c75ad0db088c15c37748c02fe81987a79a2f577ba98e
 size 323292202

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6720327a16ca420330d6f497ab14ca9acdf5db39b74ab21d7f60d03a7128395
 size 323292202

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:69f50a692634404f2eebb2eab9f456865957578d752987bc52d843ac2a774366
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a37464a7417d63637852b1fff3ec37914c73ef86362dc59ab1c9aae5b091fe4e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d53f362dfc0208a4dc0c5cfeee8ed64963b9b9023210b87aa4ab5b42abcfa0e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.9867549668874172,
   "eval_steps": 100,
-  "global_step": 12000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1712,6 +1712,84 @@
       "learning_rate": 3.091478285899862e-05,
       "loss": 0.3612,
       "step": 12000
     }
   ],
   "logging_steps": 50,
@@ -1731,7 +1809,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.411055686411878e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.0695364238410594,
   "eval_steps": 100,
+  "global_step": 12500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.091478285899862e-05,
       "loss": 0.3612,
       "step": 12000
+    },
+    {
+      "epoch": 1.9950331125827816,
+      "grad_norm": 1.2669893503189087,
+      "learning_rate": 3.0470536893814385e-05,
+      "loss": 0.3633,
+      "step": 12050
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.38875386118888855,
+      "eval_runtime": 1884.7691,
+      "eval_samples_per_second": 3.205,
+      "eval_steps_per_second": 3.205,
+      "step": 12080
+    },
+    {
+      "epoch": 2.0033112582781456,
+      "grad_norm": 1.1019831895828247,
+      "learning_rate": 3.0028102791373535e-05,
+      "loss": 0.3221,
+      "step": 12100
+    },
+    {
+      "epoch": 2.01158940397351,
+      "grad_norm": 1.095613956451416,
+      "learning_rate": 2.9587521598880573e-05,
+      "loss": 0.281,
+      "step": 12150
+    },
+    {
+      "epoch": 2.019867549668874,
+      "grad_norm": 1.1338448524475098,
+      "learning_rate": 2.914883419163475e-05,
+      "loss": 0.2824,
+      "step": 12200
+    },
+    {
+      "epoch": 2.0281456953642385,
+      "grad_norm": 1.1247187852859497,
+      "learning_rate": 2.871208126923771e-05,
+      "loss": 0.2986,
+      "step": 12250
+    },
+    {
+      "epoch": 2.0364238410596025,
+      "grad_norm": 1.0773439407348633,
+      "learning_rate": 2.827730335181765e-05,
+      "loss": 0.2852,
+      "step": 12300
+    },
+    {
+      "epoch": 2.044701986754967,
+      "grad_norm": 1.343947410583496,
+      "learning_rate": 2.7844540776269924e-05,
+      "loss": 0.2905,
+      "step": 12350
+    },
+    {
+      "epoch": 2.052980132450331,
+      "grad_norm": 0.9737703800201416,
+      "learning_rate": 2.7413833692514844e-05,
+      "loss": 0.2791,
+      "step": 12400
+    },
+    {
+      "epoch": 2.0612582781456954,
+      "grad_norm": 1.0275969505310059,
+      "learning_rate": 2.698522205977273e-05,
+      "loss": 0.2887,
+      "step": 12450
+    },
+    {
+      "epoch": 2.0695364238410594,
+      "grad_norm": 1.1896377801895142,
+      "learning_rate": 2.655874564285656e-05,
+      "loss": 0.2845,
+      "step": 12500
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 9.804845550875443e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a8006cb98bf44b0d7f4a8b1fd3538cf90707ba1f704653af70e738a604df06b
 size 5752

 version https://git-lfs.github.com/spec/v1
+oid sha256:d239bd5c15d3a2ea118383dc904ef4e598d66c2b3b3c72f570384f15cfc1fc71
 size 5752