ZeroUniqueness commited on
Commit
e6f4e44
Β·
1 Parent(s): 2883b3a

Training in progress, step 7200

Browse files
Files changed (25) hide show
  1. adapter_model.bin +1 -1
  2. {checkpoint-6800 β†’ checkpoint-7100/adapter_model}/README.md +0 -0
  3. {checkpoint-6800 β†’ checkpoint-7100/adapter_model}/adapter_config.json +4 -4
  4. {checkpoint-6800 β†’ checkpoint-7100/adapter_model}/adapter_model.bin +1 -1
  5. {checkpoint-6800/adapter_model β†’ checkpoint-7200}/README.md +0 -0
  6. {checkpoint-6800/adapter_model β†’ checkpoint-7200}/adapter_config.json +4 -4
  7. {checkpoint-6800/adapter_model β†’ checkpoint-7200}/adapter_model.bin +1 -1
  8. {checkpoint-6800 β†’ checkpoint-7200}/optimizer.pt +1 -1
  9. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_0.pth +1 -1
  10. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_1.pth +1 -1
  11. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_10.pth +1 -1
  12. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_11.pth +1 -1
  13. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_12.pth +1 -1
  14. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_13.pth +1 -1
  15. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_2.pth +1 -1
  16. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_3.pth +1 -1
  17. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_4.pth +1 -1
  18. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_5.pth +1 -1
  19. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_6.pth +1 -1
  20. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_7.pth +1 -1
  21. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_8.pth +1 -1
  22. {checkpoint-6800 β†’ checkpoint-7200}/rng_state_9.pth +1 -1
  23. {checkpoint-6800 β†’ checkpoint-7200}/scheduler.pt +1 -1
  24. {checkpoint-6800 β†’ checkpoint-7200}/trainer_state.json +107 -3
  25. {checkpoint-6800 β†’ checkpoint-7200}/training_args.bin +1 -1
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7994cd2bdb16f74437b3f74bb9b30d22b607685dff2fbbddd6503caf3ecfc9c
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ed435f77b8aaa468d0a026e8247b6e75dcc7152deb57502bce336e2ea4128e
3
  size 500897101
{checkpoint-6800 β†’ checkpoint-7100/adapter_model}/README.md RENAMED
File without changes
{checkpoint-6800 β†’ checkpoint-7100/adapter_model}/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "up_proj",
18
- "gate_proj",
19
  "down_proj",
 
20
  "q_proj",
21
  "k_proj",
22
- "o_proj",
23
- "v_proj"
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
17
  "down_proj",
18
+ "up_proj",
19
  "q_proj",
20
  "k_proj",
21
+ "gate_proj",
22
+ "v_proj",
23
+ "o_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-6800 β†’ checkpoint-7100/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caee01e1da9f7e92223499414f8bb25b28ea34daf6fb927c9bd869dcba2559fd
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7994cd2bdb16f74437b3f74bb9b30d22b607685dff2fbbddd6503caf3ecfc9c
3
  size 500897101
{checkpoint-6800/adapter_model β†’ checkpoint-7200}/README.md RENAMED
File without changes
{checkpoint-6800/adapter_model β†’ checkpoint-7200}/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "up_proj",
18
- "gate_proj",
19
  "down_proj",
 
20
  "q_proj",
21
  "k_proj",
22
- "o_proj",
23
- "v_proj"
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
17
  "down_proj",
18
+ "up_proj",
19
  "q_proj",
20
  "k_proj",
21
+ "gate_proj",
22
+ "v_proj",
23
+ "o_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-6800/adapter_model β†’ checkpoint-7200}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caee01e1da9f7e92223499414f8bb25b28ea34daf6fb927c9bd869dcba2559fd
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ed435f77b8aaa468d0a026e8247b6e75dcc7152deb57502bce336e2ea4128e
3
  size 500897101
{checkpoint-6800 β†’ checkpoint-7200}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a8797c2a2a21d3c99399832b3c5e9972ada7dec6e83cff5de4273cfa1c8c9f7
3
  size 1001752701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58080885ae3c7e810fac9015f40f367426ce410c7f99ba7b48feda9529653b88
3
  size 1001752701
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_0.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76a69b101b534038a0d1cd30c8b1e9841f687d0b3b07df6d02b3916e19b719c2
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e78a8c7990f0b5fabbe6277b9b978a79ef9d902e28f476c9e2d7ad38be8f683c
3
  size 27772
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_1.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5e8706d00c08c82d30b24671bce72e1fd88ccda2d435bfc8570ef4280b40d47
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7b8c7955910d1e491c12afee87720b150c2f84104325ab9c838c1295ee23834
3
  size 27772
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_10.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f82da6b92d2dd42eda022db14fa263f4d03e28910c03bf4e97c967f1d893d8f
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b248c3f8c2f9ee32893e76f2912253473328a82703a9e9cf774a7ddf60d42191
3
  size 27789
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_11.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:864477484049b3efdc98311c8185fa709645f956dfcbc5f22f55344bde84d440
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80b4aace0601254c95160262eebbc86921bdf994e5b06ee7d3f592a180f3f4da
3
  size 27789
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_12.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b26c1496ef4023f2612561635f505e587abbdb9e2a946f7d90f08aaa3337aa4
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbfecac90c68b3c7960f23f0a5c624343b34de925fd36e8f1553794649032b92
3
  size 27789
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_13.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3c96078789fb495fd632c506d1671752f888d28d828331ac9ab1506089b50ed
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3aaab3904ce6b40c88c9ba5f75918dcc2286b14b28ff6f552b1e33426b307a4
3
  size 27789
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_2.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61e915fcf4dd8f755df46cb9e5050aaae8b843699fa9edd019950564b2fda05c
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cea57a52a03e3e2ba53900bfa4f24480bd064ae5ee54082744c8d66479e3392
3
  size 27772
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_3.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e81ee1af73dd375865e86e8bf7e0efc15b3d4e65d58ca9f4d3ed4d33561879b
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183c5a8964dcebb4403f961f817d27e57f34a8aca9d588c50a976a55c7fd2dcc
3
  size 27772
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_4.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9150b049fc2e88708149ace562a8a4c9fef4ae5ea0bb2b8a584401d2abf84b2
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72dde9fbb9f6a3d30beec6e7a6a331f1c07cc41bc3e421c95d5fc51337163858
3
  size 27772
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_5.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9632ffce62364279230205690bdfad30fc16e17ef6a0f7efacfbeb73e2b1496
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:240e0b564b5f72f61c5e9bf130c09d6b7a884041c6817f283c0db8dce9514c6d
3
  size 27772
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_6.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0510fe79de24ae1d1cce464b86e9ee2bc9d2fa77099fb7dc30a7866117bd6e7
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3e782243547e75d21ba55dd4adfc01fec8df179ac5853e510085d873d508172
3
  size 27772
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_7.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c084b8f84126bddadb7fcd64d22191438d565af4562b1fa9ef192f15ed39f43
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a862e45bf9cf553270045d741a8187da4668bf889c171749a617824d14d63917
3
  size 27772
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_8.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a3c330abb87810692a9b735e5edfdf7a74760562ee2e69753f40e14ef0d2404
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e864e9ec63d970372488da3e89af0a1ea00e3da41e08446b93c1ddc621af475d
3
  size 27772
{checkpoint-6800 β†’ checkpoint-7200}/rng_state_9.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:002d98d8a4fb73f32df6d2c88a43fb21668bf0821b126886c8586170e99c43d9
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d8cdcdaf9565ca7ffc01e41e6463d0c5d0aaca2ff165ca019aaa7bb751b870e
3
  size 27772
{checkpoint-6800 β†’ checkpoint-7200}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89396b89609adca20f26c9861c9dcc008dc9de0fbc3e7816552f07bbb0b807a7
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d125f3dfa4d0989c607da131ae73674dff5736961f5c5c505915b427cba21012
3
  size 627
{checkpoint-6800 β†’ checkpoint-7200}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.6366808840635905,
5
- "global_step": 6800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1662,11 +1662,115 @@
1662
  "learning_rate": 7.169160631201566e-06,
1663
  "loss": 0.7692,
1664
  "step": 6800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1665
  }
1666
  ],
1667
  "max_steps": 7737,
1668
  "num_train_epochs": 3,
1669
- "total_flos": 2.928513074136613e+19,
1670
  "trial_name": null,
1671
  "trial_params": null
1672
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.791779759596743,
5
+ "global_step": 7200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1662
  "learning_rate": 7.169160631201566e-06,
1663
  "loss": 0.7692,
1664
  "step": 6800
1665
+ },
1666
+ {
1667
+ "epoch": 2.65,
1668
+ "learning_rate": 6.796041145653553e-06,
1669
+ "loss": 0.7677,
1670
+ "step": 6825
1671
+ },
1672
+ {
1673
+ "epoch": 2.66,
1674
+ "learning_rate": 6.432550823555128e-06,
1675
+ "loss": 0.7706,
1676
+ "step": 6850
1677
+ },
1678
+ {
1679
+ "epoch": 2.67,
1680
+ "learning_rate": 6.078727218115043e-06,
1681
+ "loss": 0.7678,
1682
+ "step": 6875
1683
+ },
1684
+ {
1685
+ "epoch": 2.68,
1686
+ "learning_rate": 5.734606883846338e-06,
1687
+ "loss": 0.7717,
1688
+ "step": 6900
1689
+ },
1690
+ {
1691
+ "epoch": 2.69,
1692
+ "learning_rate": 5.40022537278978e-06,
1693
+ "loss": 0.7701,
1694
+ "step": 6925
1695
+ },
1696
+ {
1697
+ "epoch": 2.69,
1698
+ "learning_rate": 5.07561723084089e-06,
1699
+ "loss": 0.7694,
1700
+ "step": 6950
1701
+ },
1702
+ {
1703
+ "epoch": 2.7,
1704
+ "learning_rate": 4.7608159941809e-06,
1705
+ "loss": 0.7659,
1706
+ "step": 6975
1707
+ },
1708
+ {
1709
+ "epoch": 2.71,
1710
+ "learning_rate": 4.455854185812047e-06,
1711
+ "loss": 0.7639,
1712
+ "step": 7000
1713
+ },
1714
+ {
1715
+ "epoch": 2.71,
1716
+ "eval_loss": 0.8055068850517273,
1717
+ "eval_runtime": 58.7443,
1718
+ "eval_samples_per_second": 12.427,
1719
+ "eval_steps_per_second": 0.902,
1720
+ "step": 7000
1721
+ },
1722
+ {
1723
+ "epoch": 2.72,
1724
+ "learning_rate": 4.160763312197513e-06,
1725
+ "loss": 0.7724,
1726
+ "step": 7025
1727
+ },
1728
+ {
1729
+ "epoch": 2.73,
1730
+ "learning_rate": 3.875573860006421e-06,
1731
+ "loss": 0.7696,
1732
+ "step": 7050
1733
+ },
1734
+ {
1735
+ "epoch": 2.74,
1736
+ "learning_rate": 3.6003152929641624e-06,
1737
+ "loss": 0.7625,
1738
+ "step": 7075
1739
+ },
1740
+ {
1741
+ "epoch": 2.75,
1742
+ "learning_rate": 3.335016048808437e-06,
1743
+ "loss": 0.7733,
1744
+ "step": 7100
1745
+ },
1746
+ {
1747
+ "epoch": 2.76,
1748
+ "learning_rate": 3.0797035363512193e-06,
1749
+ "loss": 0.7685,
1750
+ "step": 7125
1751
+ },
1752
+ {
1753
+ "epoch": 2.77,
1754
+ "learning_rate": 2.834404132647128e-06,
1755
+ "loss": 0.769,
1756
+ "step": 7150
1757
+ },
1758
+ {
1759
+ "epoch": 2.78,
1760
+ "learning_rate": 2.5991431802683262e-06,
1761
+ "loss": 0.7647,
1762
+ "step": 7175
1763
+ },
1764
+ {
1765
+ "epoch": 2.79,
1766
+ "learning_rate": 2.3739449846862826e-06,
1767
+ "loss": 0.7634,
1768
+ "step": 7200
1769
  }
1770
  ],
1771
  "max_steps": 7737,
1772
  "num_train_epochs": 3,
1773
+ "total_flos": 3.100665541573791e+19,
1774
  "trial_name": null,
1775
  "trial_params": null
1776
  }
{checkpoint-6800 β†’ checkpoint-7200}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e85009bca9623c846e630c294adb80ecbcd9e720da8da9f9ee5311b562908b91
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c2f4cc723c65538de12d445f71a30b1610b702bfe771edb2385636bb8724bfd
3
  size 4027