youssefedweqd commited on
Commit
41f749c
·
verified ·
1 Parent(s): b6b258d

Training in progress, step 12400, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -199,4 +199,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
- - PEFT 0.15.1
 
199
  [More Information Needed]
200
  ### Framework versions
201
 
202
+ - PEFT 0.14.0
last-checkpoint/adapter_config.json CHANGED
@@ -3,7 +3,6 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen3-0.6B",
5
  "bias": "none",
6
- "corda_config": null,
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
@@ -24,16 +23,15 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
- "v_proj",
28
  "q_proj",
29
- "down_proj",
30
- "up_proj",
31
  "o_proj",
 
 
32
  "k_proj",
33
- "gate_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
- "trainable_token_indices": null,
37
  "use_dora": false,
38
  "use_rslora": false
39
  }
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Qwen/Qwen3-0.6B",
5
  "bias": "none",
 
6
  "eva_config": null,
7
  "exclude_modules": null,
8
  "fan_in_fan_out": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "gate_proj",
27
  "q_proj",
 
 
28
  "o_proj",
29
+ "v_proj",
30
+ "up_proj",
31
  "k_proj",
32
+ "down_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
 
35
  "use_dora": false,
36
  "use_rslora": false
37
  }
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32da8fce78ed226e77f431e33fe7e0f42f4f018a69f74c5398b0e2ccf1a881cb
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bff94a08cfbfadc81987308cc7d47af94ac8ce7e11fb190f1f75ef59c20e3b15
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1c53b527b35aefa1821c75ad0db088c15c37748c02fe81987a79a2f577ba98e
3
  size 323292202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c3d349cf5e65bb928953b78c47d5759352643d122fa1d227da5e8e6650f25ad
3
  size 323292202
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69f50a692634404f2eebb2eab9f456865957578d752987bc52d843ac2a774366
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a37464a7417d63637852b1fff3ec37914c73ef86362dc59ab1c9aae5b091fe4e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7249c5158b8459ddcca143f0f417ff8eb82437a3eefc6bd1ba816e8c2260b9b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.9867549668874172,
6
  "eval_steps": 100,
7
- "global_step": 12000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1712,6 +1712,70 @@
1712
  "learning_rate": 3.091478285899862e-05,
1713
  "loss": 0.3612,
1714
  "step": 12000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1715
  }
1716
  ],
1717
  "logging_steps": 50,
@@ -1731,7 +1795,7 @@
1731
  "attributes": {}
1732
  }
1733
  },
1734
- "total_flos": 9.411055686411878e+16,
1735
  "train_batch_size": 1,
1736
  "trial_name": null,
1737
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.052980132450331,
6
  "eval_steps": 100,
7
+ "global_step": 12400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1712
  "learning_rate": 3.091478285899862e-05,
1713
  "loss": 0.3612,
1714
  "step": 12000
1715
+ },
1716
+ {
1717
+ "epoch": 1.9950331125827816,
1718
+ "grad_norm": 1.2669893503189087,
1719
+ "learning_rate": 3.0470536893814385e-05,
1720
+ "loss": 0.3633,
1721
+ "step": 12050
1722
+ },
1723
+ {
1724
+ "epoch": 2.0,
1725
+ "eval_loss": 0.38875386118888855,
1726
+ "eval_runtime": 1884.7691,
1727
+ "eval_samples_per_second": 3.205,
1728
+ "eval_steps_per_second": 3.205,
1729
+ "step": 12080
1730
+ },
1731
+ {
1732
+ "epoch": 2.0033112582781456,
1733
+ "grad_norm": 1.1019831895828247,
1734
+ "learning_rate": 3.0028102791373535e-05,
1735
+ "loss": 0.3221,
1736
+ "step": 12100
1737
+ },
1738
+ {
1739
+ "epoch": 2.01158940397351,
1740
+ "grad_norm": 1.095613956451416,
1741
+ "learning_rate": 2.9587521598880573e-05,
1742
+ "loss": 0.281,
1743
+ "step": 12150
1744
+ },
1745
+ {
1746
+ "epoch": 2.019867549668874,
1747
+ "grad_norm": 1.1338448524475098,
1748
+ "learning_rate": 2.914883419163475e-05,
1749
+ "loss": 0.2824,
1750
+ "step": 12200
1751
+ },
1752
+ {
1753
+ "epoch": 2.0281456953642385,
1754
+ "grad_norm": 1.1247187852859497,
1755
+ "learning_rate": 2.871208126923771e-05,
1756
+ "loss": 0.2986,
1757
+ "step": 12250
1758
+ },
1759
+ {
1760
+ "epoch": 2.0364238410596025,
1761
+ "grad_norm": 1.0773439407348633,
1762
+ "learning_rate": 2.827730335181765e-05,
1763
+ "loss": 0.2852,
1764
+ "step": 12300
1765
+ },
1766
+ {
1767
+ "epoch": 2.044701986754967,
1768
+ "grad_norm": 1.343947410583496,
1769
+ "learning_rate": 2.7844540776269924e-05,
1770
+ "loss": 0.2905,
1771
+ "step": 12350
1772
+ },
1773
+ {
1774
+ "epoch": 2.052980132450331,
1775
+ "grad_norm": 0.9737703800201416,
1776
+ "learning_rate": 2.7413833692514844e-05,
1777
+ "loss": 0.2791,
1778
+ "step": 12400
1779
  }
1780
  ],
1781
  "logging_steps": 50,
 
1795
  "attributes": {}
1796
  }
1797
  },
1798
+ "total_flos": 9.725295003697152e+16,
1799
  "train_batch_size": 1,
1800
  "trial_name": null,
1801
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a8006cb98bf44b0d7f4a8b1fd3538cf90707ba1f704653af70e738a604df06b
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d239bd5c15d3a2ea118383dc904ef4e598d66c2b3b3c72f570384f15cfc1fc71
3
  size 5752