youssefedweqd commited on
Commit
d649966
·
verified ·
1 Parent(s): 873c7f9

Training in progress, step 12500, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
- - PEFT 0.15.1
 
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
+ - PEFT 0.14.0
last-checkpoint/adapter_config.json CHANGED
@@ -3,7 +3,6 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen3-0.6B",
5
  "bias": "none",
6
- "corda_config": null,
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
@@ -24,16 +23,15 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
- "v_proj",
28
  "q_proj",
29
- "down_proj",
30
- "up_proj",
31
  "o_proj",
 
 
32
  "k_proj",
33
- "gate_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
- "trainable_token_indices": null,
37
  "use_dora": false,
38
  "use_rslora": false
39
  }
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen3-0.6B",
5
  "bias": "none",
 
6
  "eva_config": null,
7
  "exclude_modules": null,
8
  "fan_in_fan_out": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "gate_proj",
27
  "q_proj",
 
 
28
  "o_proj",
29
+ "v_proj",
30
+ "up_proj",
31
  "k_proj",
32
+ "down_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
 
35
  "use_dora": false,
36
  "use_rslora": false
37
  }
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32da8fce78ed226e77f431e33fe7e0f42f4f018a69f74c5398b0e2ccf1a881cb
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d7f04d35541d77dbff70445b4364fcae91e44972bf3ab7e69103b284501ed21
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1c53b527b35aefa1821c75ad0db088c15c37748c02fe81987a79a2f577ba98e
3
  size 323292202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6720327a16ca420330d6f497ab14ca9acdf5db39b74ab21d7f60d03a7128395
3
  size 323292202
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69f50a692634404f2eebb2eab9f456865957578d752987bc52d843ac2a774366
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a37464a7417d63637852b1fff3ec37914c73ef86362dc59ab1c9aae5b091fe4e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d53f362dfc0208a4dc0c5cfeee8ed64963b9b9023210b87aa4ab5b42abcfa0e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.9867549668874172,
6
  "eval_steps": 100,
7
- "global_step": 12000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1712,6 +1712,84 @@
1712
  "learning_rate": 3.091478285899862e-05,
1713
  "loss": 0.3612,
1714
  "step": 12000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1715
  }
1716
  ],
1717
  "logging_steps": 50,
@@ -1731,7 +1809,7 @@
1731
  "attributes": {}
1732
  }
1733
  },
1734
- "total_flos": 9.411055686411878e+16,
1735
  "train_batch_size": 1,
1736
  "trial_name": null,
1737
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.0695364238410594,
6
  "eval_steps": 100,
7
+ "global_step": 12500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1712
  "learning_rate": 3.091478285899862e-05,
1713
  "loss": 0.3612,
1714
  "step": 12000
1715
+ },
1716
+ {
1717
+ "epoch": 1.9950331125827816,
1718
+ "grad_norm": 1.2669893503189087,
1719
+ "learning_rate": 3.0470536893814385e-05,
1720
+ "loss": 0.3633,
1721
+ "step": 12050
1722
+ },
1723
+ {
1724
+ "epoch": 2.0,
1725
+ "eval_loss": 0.38875386118888855,
1726
+ "eval_runtime": 1884.7691,
1727
+ "eval_samples_per_second": 3.205,
1728
+ "eval_steps_per_second": 3.205,
1729
+ "step": 12080
1730
+ },
1731
+ {
1732
+ "epoch": 2.0033112582781456,
1733
+ "grad_norm": 1.1019831895828247,
1734
+ "learning_rate": 3.0028102791373535e-05,
1735
+ "loss": 0.3221,
1736
+ "step": 12100
1737
+ },
1738
+ {
1739
+ "epoch": 2.01158940397351,
1740
+ "grad_norm": 1.095613956451416,
1741
+ "learning_rate": 2.9587521598880573e-05,
1742
+ "loss": 0.281,
1743
+ "step": 12150
1744
+ },
1745
+ {
1746
+ "epoch": 2.019867549668874,
1747
+ "grad_norm": 1.1338448524475098,
1748
+ "learning_rate": 2.914883419163475e-05,
1749
+ "loss": 0.2824,
1750
+ "step": 12200
1751
+ },
1752
+ {
1753
+ "epoch": 2.0281456953642385,
1754
+ "grad_norm": 1.1247187852859497,
1755
+ "learning_rate": 2.871208126923771e-05,
1756
+ "loss": 0.2986,
1757
+ "step": 12250
1758
+ },
1759
+ {
1760
+ "epoch": 2.0364238410596025,
1761
+ "grad_norm": 1.0773439407348633,
1762
+ "learning_rate": 2.827730335181765e-05,
1763
+ "loss": 0.2852,
1764
+ "step": 12300
1765
+ },
1766
+ {
1767
+ "epoch": 2.044701986754967,
1768
+ "grad_norm": 1.343947410583496,
1769
+ "learning_rate": 2.7844540776269924e-05,
1770
+ "loss": 0.2905,
1771
+ "step": 12350
1772
+ },
1773
+ {
1774
+ "epoch": 2.052980132450331,
1775
+ "grad_norm": 0.9737703800201416,
1776
+ "learning_rate": 2.7413833692514844e-05,
1777
+ "loss": 0.2791,
1778
+ "step": 12400
1779
+ },
1780
+ {
1781
+ "epoch": 2.0612582781456954,
1782
+ "grad_norm": 1.0275969505310059,
1783
+ "learning_rate": 2.698522205977273e-05,
1784
+ "loss": 0.2887,
1785
+ "step": 12450
1786
+ },
1787
+ {
1788
+ "epoch": 2.0695364238410594,
1789
+ "grad_norm": 1.1896377801895142,
1790
+ "learning_rate": 2.655874564285656e-05,
1791
+ "loss": 0.2845,
1792
+ "step": 12500
1793
  }
1794
  ],
1795
  "logging_steps": 50,
 
1809
  "attributes": {}
1810
  }
1811
  },
1812
+ "total_flos": 9.804845550875443e+16,
1813
  "train_batch_size": 1,
1814
  "trial_name": null,
1815
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a8006cb98bf44b0d7f4a8b1fd3538cf90707ba1f704653af70e738a604df06b
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d239bd5c15d3a2ea118383dc904ef4e598d66c2b3b3c72f570384f15cfc1fc71
3
  size 5752