Training in progress, step 12500

Browse files

Files changed (10) hide show

adapter_model.safetensors +1 -1
last-checkpoint/README.md +1 -1
last-checkpoint/adapter_config.json +6 -4
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +3 -67
last-checkpoint/training_args.bin +1 -1
trainer_log.jsonl +2 -0

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bff94a08cfbfadc81987308cc7d47af94ac8ce7e11fb190f1f75ef59c20e3b15
 size 161533160

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d7f04d35541d77dbff70445b4364fcae91e44972bf3ab7e69103b284501ed21
 size 161533160

last-checkpoint/README.md CHANGED Viewed

@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 [More Information Needed]
 ### Framework versions
-- PEFT 0.14.0

 [More Information Needed]
 ### Framework versions
+- PEFT 0.15.1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -3,6 +3,7 @@
   "auto_mapping": null,
   "base_model_name_or_path": "Qwen/Qwen3-0.6B",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
@@ -23,15 +24,16 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
-    "q_proj",
-    "o_proj",
     "v_proj",
     "up_proj",
     "k_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false
 }

   "auto_mapping": null,
   "base_model_name_or_path": "Qwen/Qwen3-0.6B",
   "bias": "none",
+  "corda_config": null,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
+    "q_proj",
+    "down_proj",
     "up_proj",
+    "o_proj",
     "k_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
   "use_dora": false,
   "use_rslora": false
 }

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bff94a08cfbfadc81987308cc7d47af94ac8ce7e11fb190f1f75ef59c20e3b15
 size 161533160

 version https://git-lfs.github.com/spec/v1
+oid sha256:32da8fce78ed226e77f431e33fe7e0f42f4f018a69f74c5398b0e2ccf1a881cb
 size 161533160

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c3d349cf5e65bb928953b78c47d5759352643d122fa1d227da5e8e6650f25ad
 size 323292202

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1c53b527b35aefa1821c75ad0db088c15c37748c02fe81987a79a2f577ba98e
 size 323292202

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69f50a692634404f2eebb2eab9f456865957578d752987bc52d843ac2a774366
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7249c5158b8459ddcca143f0f417ff8eb82437a3eefc6bd1ba816e8c2260b9b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a37464a7417d63637852b1fff3ec37914c73ef86362dc59ab1c9aae5b091fe4e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.052980132450331,
   "eval_steps": 100,
-  "global_step": 12400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1712,70 +1712,6 @@
       "learning_rate": 3.091478285899862e-05,
       "loss": 0.3612,
       "step": 12000
-    },
-    {
-      "epoch": 1.9950331125827816,
-      "grad_norm": 1.2669893503189087,
-      "learning_rate": 3.0470536893814385e-05,
-      "loss": 0.3633,
-      "step": 12050
-    },
-    {
-      "epoch": 2.0,
-      "eval_loss": 0.38875386118888855,
-      "eval_runtime": 1884.7691,
-      "eval_samples_per_second": 3.205,
-      "eval_steps_per_second": 3.205,
-      "step": 12080
-    },
-    {
-      "epoch": 2.0033112582781456,
-      "grad_norm": 1.1019831895828247,
-      "learning_rate": 3.0028102791373535e-05,
-      "loss": 0.3221,
-      "step": 12100
-    },
-    {
-      "epoch": 2.01158940397351,
-      "grad_norm": 1.095613956451416,
-      "learning_rate": 2.9587521598880573e-05,
-      "loss": 0.281,
-      "step": 12150
-    },
-    {
-      "epoch": 2.019867549668874,
-      "grad_norm": 1.1338448524475098,
-      "learning_rate": 2.914883419163475e-05,
-      "loss": 0.2824,
-      "step": 12200
-    },
-    {
-      "epoch": 2.0281456953642385,
-      "grad_norm": 1.1247187852859497,
-      "learning_rate": 2.871208126923771e-05,
-      "loss": 0.2986,
-      "step": 12250
-    },
-    {
-      "epoch": 2.0364238410596025,
-      "grad_norm": 1.0773439407348633,
-      "learning_rate": 2.827730335181765e-05,
-      "loss": 0.2852,
-      "step": 12300
-    },
-    {
-      "epoch": 2.044701986754967,
-      "grad_norm": 1.343947410583496,
-      "learning_rate": 2.7844540776269924e-05,
-      "loss": 0.2905,
-      "step": 12350
-    },
-    {
-      "epoch": 2.052980132450331,
-      "grad_norm": 0.9737703800201416,
-      "learning_rate": 2.7413833692514844e-05,
-      "loss": 0.2791,
-      "step": 12400
     }
   ],
   "logging_steps": 50,
@@ -1795,7 +1731,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.725295003697152e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9867549668874172,
   "eval_steps": 100,
+  "global_step": 12000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.091478285899862e-05,
       "loss": 0.3612,
       "step": 12000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 9.411055686411878e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d239bd5c15d3a2ea118383dc904ef4e598d66c2b3b3c72f570384f15cfc1fc71
 size 5752

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a8006cb98bf44b0d7f4a8b1fd3538cf90707ba1f704653af70e738a604df06b
 size 5752

trainer_log.jsonl CHANGED Viewed

@@ -7,3 +7,5 @@
 {"current_steps": 12300, "total_steps": 18120, "loss": 0.2852, "lr": 2.827730335181765e-05, "epoch": 2.0364238410596025, "percentage": 67.88, "elapsed_time": "0:50:32", "remaining_time": "0:23:54"}
 {"current_steps": 12350, "total_steps": 18120, "loss": 0.2905, "lr": 2.7844540776269924e-05, "epoch": 2.044701986754967, "percentage": 68.16, "elapsed_time": "0:53:57", "remaining_time": "0:25:12"}
 {"current_steps": 12400, "total_steps": 18120, "loss": 0.2791, "lr": 2.7413833692514844e-05, "epoch": 2.052980132450331, "percentage": 68.43, "elapsed_time": "0:57:06", "remaining_time": "0:26:20"}

 {"current_steps": 12300, "total_steps": 18120, "loss": 0.2852, "lr": 2.827730335181765e-05, "epoch": 2.0364238410596025, "percentage": 67.88, "elapsed_time": "0:50:32", "remaining_time": "0:23:54"}
 {"current_steps": 12350, "total_steps": 18120, "loss": 0.2905, "lr": 2.7844540776269924e-05, "epoch": 2.044701986754967, "percentage": 68.16, "elapsed_time": "0:53:57", "remaining_time": "0:25:12"}
 {"current_steps": 12400, "total_steps": 18120, "loss": 0.2791, "lr": 2.7413833692514844e-05, "epoch": 2.052980132450331, "percentage": 68.43, "elapsed_time": "0:57:06", "remaining_time": "0:26:20"}
+{"current_steps": 12450, "total_steps": 18120, "loss": 0.2887, "lr": 2.698522205977273e-05, "epoch": 2.0612582781456954, "percentage": 68.71, "elapsed_time": "1:00:18", "remaining_time": "0:27:27"}
+{"current_steps": 12500, "total_steps": 18120, "loss": 0.2845, "lr": 2.655874564285656e-05, "epoch": 2.0695364238410594, "percentage": 68.98, "elapsed_time": "1:03:33", "remaining_time": "0:28:34"}