youssefedweqd commited on
Commit
873c7f9
·
verified ·
1 Parent(s): 41f749c

Training in progress, step 12500

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bff94a08cfbfadc81987308cc7d47af94ac8ce7e11fb190f1f75ef59c20e3b15
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d7f04d35541d77dbff70445b4364fcae91e44972bf3ab7e69103b284501ed21
3
  size 161533160
last-checkpoint/README.md CHANGED
@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
- - PEFT 0.14.0
 
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
+ - PEFT 0.15.1
last-checkpoint/adapter_config.json CHANGED
@@ -3,6 +3,7 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen3-0.6B",
5
  "bias": "none",
 
6
  "eva_config": null,
7
  "exclude_modules": null,
8
  "fan_in_fan_out": false,
@@ -23,15 +24,16 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "gate_proj",
27
- "q_proj",
28
- "o_proj",
29
  "v_proj",
 
 
30
  "up_proj",
 
31
  "k_proj",
32
- "down_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
 
35
  "use_dora": false,
36
  "use_rslora": false
37
  }
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen3-0.6B",
5
  "bias": "none",
6
+ "corda_config": null,
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
 
 
27
  "v_proj",
28
+ "q_proj",
29
+ "down_proj",
30
  "up_proj",
31
+ "o_proj",
32
  "k_proj",
33
+ "gate_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
  "use_dora": false,
38
  "use_rslora": false
39
  }
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bff94a08cfbfadc81987308cc7d47af94ac8ce7e11fb190f1f75ef59c20e3b15
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32da8fce78ed226e77f431e33fe7e0f42f4f018a69f74c5398b0e2ccf1a881cb
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c3d349cf5e65bb928953b78c47d5759352643d122fa1d227da5e8e6650f25ad
3
  size 323292202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1c53b527b35aefa1821c75ad0db088c15c37748c02fe81987a79a2f577ba98e
3
  size 323292202
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69f50a692634404f2eebb2eab9f456865957578d752987bc52d843ac2a774366
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7249c5158b8459ddcca143f0f417ff8eb82437a3eefc6bd1ba816e8c2260b9b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a37464a7417d63637852b1fff3ec37914c73ef86362dc59ab1c9aae5b091fe4e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.052980132450331,
6
  "eval_steps": 100,
7
- "global_step": 12400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1712,70 +1712,6 @@
1712
  "learning_rate": 3.091478285899862e-05,
1713
  "loss": 0.3612,
1714
  "step": 12000
1715
- },
1716
- {
1717
- "epoch": 1.9950331125827816,
1718
- "grad_norm": 1.2669893503189087,
1719
- "learning_rate": 3.0470536893814385e-05,
1720
- "loss": 0.3633,
1721
- "step": 12050
1722
- },
1723
- {
1724
- "epoch": 2.0,
1725
- "eval_loss": 0.38875386118888855,
1726
- "eval_runtime": 1884.7691,
1727
- "eval_samples_per_second": 3.205,
1728
- "eval_steps_per_second": 3.205,
1729
- "step": 12080
1730
- },
1731
- {
1732
- "epoch": 2.0033112582781456,
1733
- "grad_norm": 1.1019831895828247,
1734
- "learning_rate": 3.0028102791373535e-05,
1735
- "loss": 0.3221,
1736
- "step": 12100
1737
- },
1738
- {
1739
- "epoch": 2.01158940397351,
1740
- "grad_norm": 1.095613956451416,
1741
- "learning_rate": 2.9587521598880573e-05,
1742
- "loss": 0.281,
1743
- "step": 12150
1744
- },
1745
- {
1746
- "epoch": 2.019867549668874,
1747
- "grad_norm": 1.1338448524475098,
1748
- "learning_rate": 2.914883419163475e-05,
1749
- "loss": 0.2824,
1750
- "step": 12200
1751
- },
1752
- {
1753
- "epoch": 2.0281456953642385,
1754
- "grad_norm": 1.1247187852859497,
1755
- "learning_rate": 2.871208126923771e-05,
1756
- "loss": 0.2986,
1757
- "step": 12250
1758
- },
1759
- {
1760
- "epoch": 2.0364238410596025,
1761
- "grad_norm": 1.0773439407348633,
1762
- "learning_rate": 2.827730335181765e-05,
1763
- "loss": 0.2852,
1764
- "step": 12300
1765
- },
1766
- {
1767
- "epoch": 2.044701986754967,
1768
- "grad_norm": 1.343947410583496,
1769
- "learning_rate": 2.7844540776269924e-05,
1770
- "loss": 0.2905,
1771
- "step": 12350
1772
- },
1773
- {
1774
- "epoch": 2.052980132450331,
1775
- "grad_norm": 0.9737703800201416,
1776
- "learning_rate": 2.7413833692514844e-05,
1777
- "loss": 0.2791,
1778
- "step": 12400
1779
  }
1780
  ],
1781
  "logging_steps": 50,
@@ -1795,7 +1731,7 @@
1795
  "attributes": {}
1796
  }
1797
  },
1798
- "total_flos": 9.725295003697152e+16,
1799
  "train_batch_size": 1,
1800
  "trial_name": null,
1801
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.9867549668874172,
6
  "eval_steps": 100,
7
+ "global_step": 12000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1712
  "learning_rate": 3.091478285899862e-05,
1713
  "loss": 0.3612,
1714
  "step": 12000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1715
  }
1716
  ],
1717
  "logging_steps": 50,
 
1731
  "attributes": {}
1732
  }
1733
  },
1734
+ "total_flos": 9.411055686411878e+16,
1735
  "train_batch_size": 1,
1736
  "trial_name": null,
1737
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d239bd5c15d3a2ea118383dc904ef4e598d66c2b3b3c72f570384f15cfc1fc71
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a8006cb98bf44b0d7f4a8b1fd3538cf90707ba1f704653af70e738a604df06b
3
  size 5752
trainer_log.jsonl CHANGED
@@ -7,3 +7,5 @@
7
  {"current_steps": 12300, "total_steps": 18120, "loss": 0.2852, "lr": 2.827730335181765e-05, "epoch": 2.0364238410596025, "percentage": 67.88, "elapsed_time": "0:50:32", "remaining_time": "0:23:54"}
8
  {"current_steps": 12350, "total_steps": 18120, "loss": 0.2905, "lr": 2.7844540776269924e-05, "epoch": 2.044701986754967, "percentage": 68.16, "elapsed_time": "0:53:57", "remaining_time": "0:25:12"}
9
  {"current_steps": 12400, "total_steps": 18120, "loss": 0.2791, "lr": 2.7413833692514844e-05, "epoch": 2.052980132450331, "percentage": 68.43, "elapsed_time": "0:57:06", "remaining_time": "0:26:20"}
 
 
 
7
  {"current_steps": 12300, "total_steps": 18120, "loss": 0.2852, "lr": 2.827730335181765e-05, "epoch": 2.0364238410596025, "percentage": 67.88, "elapsed_time": "0:50:32", "remaining_time": "0:23:54"}
8
  {"current_steps": 12350, "total_steps": 18120, "loss": 0.2905, "lr": 2.7844540776269924e-05, "epoch": 2.044701986754967, "percentage": 68.16, "elapsed_time": "0:53:57", "remaining_time": "0:25:12"}
9
  {"current_steps": 12400, "total_steps": 18120, "loss": 0.2791, "lr": 2.7413833692514844e-05, "epoch": 2.052980132450331, "percentage": 68.43, "elapsed_time": "0:57:06", "remaining_time": "0:26:20"}
10
+ {"current_steps": 12450, "total_steps": 18120, "loss": 0.2887, "lr": 2.698522205977273e-05, "epoch": 2.0612582781456954, "percentage": 68.71, "elapsed_time": "1:00:18", "remaining_time": "0:27:27"}
11
+ {"current_steps": 12500, "total_steps": 18120, "loss": 0.2845, "lr": 2.655874564285656e-05, "epoch": 2.0695364238410594, "percentage": 68.98, "elapsed_time": "1:03:33", "remaining_time": "0:28:34"}