Commit Β·
e6f4e44
1
Parent(s): 2883b3a
Training in progress, step 7200
Browse files- adapter_model.bin +1 -1
- {checkpoint-6800 β checkpoint-7100/adapter_model}/README.md +0 -0
- {checkpoint-6800 β checkpoint-7100/adapter_model}/adapter_config.json +4 -4
- {checkpoint-6800 β checkpoint-7100/adapter_model}/adapter_model.bin +1 -1
- {checkpoint-6800/adapter_model β checkpoint-7200}/README.md +0 -0
- {checkpoint-6800/adapter_model β checkpoint-7200}/adapter_config.json +4 -4
- {checkpoint-6800/adapter_model β checkpoint-7200}/adapter_model.bin +1 -1
- {checkpoint-6800 β checkpoint-7200}/optimizer.pt +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_0.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_1.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_10.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_11.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_12.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_13.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_2.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_3.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_4.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_5.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_6.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_7.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_8.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/rng_state_9.pth +1 -1
- {checkpoint-6800 β checkpoint-7200}/scheduler.pt +1 -1
- {checkpoint-6800 β checkpoint-7200}/trainer_state.json +107 -3
- {checkpoint-6800 β checkpoint-7200}/training_args.bin +1 -1
adapter_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 500897101
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4ed435f77b8aaa468d0a026e8247b6e75dcc7152deb57502bce336e2ea4128e
|
| 3 |
size 500897101
|
{checkpoint-6800 β checkpoint-7100/adapter_model}/README.md
RENAMED
|
File without changes
|
{checkpoint-6800 β checkpoint-7100/adapter_model}/adapter_config.json
RENAMED
|
@@ -14,13 +14,13 @@
|
|
| 14 |
"r": 32,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
| 17 |
-
"up_proj",
|
| 18 |
-
"gate_proj",
|
| 19 |
"down_proj",
|
|
|
|
| 20 |
"q_proj",
|
| 21 |
"k_proj",
|
| 22 |
-
"
|
| 23 |
-
"v_proj"
|
|
|
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
|
|
|
| 14 |
"r": 32,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
|
|
|
|
|
|
| 17 |
"down_proj",
|
| 18 |
+
"up_proj",
|
| 19 |
"q_proj",
|
| 20 |
"k_proj",
|
| 21 |
+
"gate_proj",
|
| 22 |
+
"v_proj",
|
| 23 |
+
"o_proj"
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
{checkpoint-6800 β checkpoint-7100/adapter_model}/adapter_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 500897101
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7994cd2bdb16f74437b3f74bb9b30d22b607685dff2fbbddd6503caf3ecfc9c
|
| 3 |
size 500897101
|
{checkpoint-6800/adapter_model β checkpoint-7200}/README.md
RENAMED
|
File without changes
|
{checkpoint-6800/adapter_model β checkpoint-7200}/adapter_config.json
RENAMED
|
@@ -14,13 +14,13 @@
|
|
| 14 |
"r": 32,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
| 17 |
-
"up_proj",
|
| 18 |
-
"gate_proj",
|
| 19 |
"down_proj",
|
|
|
|
| 20 |
"q_proj",
|
| 21 |
"k_proj",
|
| 22 |
-
"
|
| 23 |
-
"v_proj"
|
|
|
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
|
|
|
| 14 |
"r": 32,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
|
|
|
|
|
|
| 17 |
"down_proj",
|
| 18 |
+
"up_proj",
|
| 19 |
"q_proj",
|
| 20 |
"k_proj",
|
| 21 |
+
"gate_proj",
|
| 22 |
+
"v_proj",
|
| 23 |
+
"o_proj"
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
{checkpoint-6800/adapter_model β checkpoint-7200}/adapter_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 500897101
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4ed435f77b8aaa468d0a026e8247b6e75dcc7152deb57502bce336e2ea4128e
|
| 3 |
size 500897101
|
{checkpoint-6800 β checkpoint-7200}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1001752701
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58080885ae3c7e810fac9015f40f367426ce410c7f99ba7b48feda9529653b88
|
| 3 |
size 1001752701
|
{checkpoint-6800 β checkpoint-7200}/rng_state_0.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e78a8c7990f0b5fabbe6277b9b978a79ef9d902e28f476c9e2d7ad38be8f683c
|
| 3 |
size 27772
|
{checkpoint-6800 β checkpoint-7200}/rng_state_1.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7b8c7955910d1e491c12afee87720b150c2f84104325ab9c838c1295ee23834
|
| 3 |
size 27772
|
{checkpoint-6800 β checkpoint-7200}/rng_state_10.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27789
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b248c3f8c2f9ee32893e76f2912253473328a82703a9e9cf774a7ddf60d42191
|
| 3 |
size 27789
|
{checkpoint-6800 β checkpoint-7200}/rng_state_11.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27789
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80b4aace0601254c95160262eebbc86921bdf994e5b06ee7d3f592a180f3f4da
|
| 3 |
size 27789
|
{checkpoint-6800 β checkpoint-7200}/rng_state_12.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27789
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbfecac90c68b3c7960f23f0a5c624343b34de925fd36e8f1553794649032b92
|
| 3 |
size 27789
|
{checkpoint-6800 β checkpoint-7200}/rng_state_13.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27789
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3aaab3904ce6b40c88c9ba5f75918dcc2286b14b28ff6f552b1e33426b307a4
|
| 3 |
size 27789
|
{checkpoint-6800 β checkpoint-7200}/rng_state_2.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cea57a52a03e3e2ba53900bfa4f24480bd064ae5ee54082744c8d66479e3392
|
| 3 |
size 27772
|
{checkpoint-6800 β checkpoint-7200}/rng_state_3.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:183c5a8964dcebb4403f961f817d27e57f34a8aca9d588c50a976a55c7fd2dcc
|
| 3 |
size 27772
|
{checkpoint-6800 β checkpoint-7200}/rng_state_4.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72dde9fbb9f6a3d30beec6e7a6a331f1c07cc41bc3e421c95d5fc51337163858
|
| 3 |
size 27772
|
{checkpoint-6800 β checkpoint-7200}/rng_state_5.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:240e0b564b5f72f61c5e9bf130c09d6b7a884041c6817f283c0db8dce9514c6d
|
| 3 |
size 27772
|
{checkpoint-6800 β checkpoint-7200}/rng_state_6.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3e782243547e75d21ba55dd4adfc01fec8df179ac5853e510085d873d508172
|
| 3 |
size 27772
|
{checkpoint-6800 β checkpoint-7200}/rng_state_7.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a862e45bf9cf553270045d741a8187da4668bf889c171749a617824d14d63917
|
| 3 |
size 27772
|
{checkpoint-6800 β checkpoint-7200}/rng_state_8.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e864e9ec63d970372488da3e89af0a1ea00e3da41e08446b93c1ddc621af475d
|
| 3 |
size 27772
|
{checkpoint-6800 β checkpoint-7200}/rng_state_9.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d8cdcdaf9565ca7ffc01e41e6463d0c5d0aaca2ff165ca019aaa7bb751b870e
|
| 3 |
size 27772
|
{checkpoint-6800 β checkpoint-7200}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d125f3dfa4d0989c607da131ae73674dff5736961f5c5c505915b427cba21012
|
| 3 |
size 627
|
{checkpoint-6800 β checkpoint-7200}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 2.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1662,11 +1662,115 @@
|
|
| 1662 |
"learning_rate": 7.169160631201566e-06,
|
| 1663 |
"loss": 0.7692,
|
| 1664 |
"step": 6800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1665 |
}
|
| 1666 |
],
|
| 1667 |
"max_steps": 7737,
|
| 1668 |
"num_train_epochs": 3,
|
| 1669 |
-
"total_flos":
|
| 1670 |
"trial_name": null,
|
| 1671 |
"trial_params": null
|
| 1672 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.791779759596743,
|
| 5 |
+
"global_step": 7200,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1662 |
"learning_rate": 7.169160631201566e-06,
|
| 1663 |
"loss": 0.7692,
|
| 1664 |
"step": 6800
|
| 1665 |
+
},
|
| 1666 |
+
{
|
| 1667 |
+
"epoch": 2.65,
|
| 1668 |
+
"learning_rate": 6.796041145653553e-06,
|
| 1669 |
+
"loss": 0.7677,
|
| 1670 |
+
"step": 6825
|
| 1671 |
+
},
|
| 1672 |
+
{
|
| 1673 |
+
"epoch": 2.66,
|
| 1674 |
+
"learning_rate": 6.432550823555128e-06,
|
| 1675 |
+
"loss": 0.7706,
|
| 1676 |
+
"step": 6850
|
| 1677 |
+
},
|
| 1678 |
+
{
|
| 1679 |
+
"epoch": 2.67,
|
| 1680 |
+
"learning_rate": 6.078727218115043e-06,
|
| 1681 |
+
"loss": 0.7678,
|
| 1682 |
+
"step": 6875
|
| 1683 |
+
},
|
| 1684 |
+
{
|
| 1685 |
+
"epoch": 2.68,
|
| 1686 |
+
"learning_rate": 5.734606883846338e-06,
|
| 1687 |
+
"loss": 0.7717,
|
| 1688 |
+
"step": 6900
|
| 1689 |
+
},
|
| 1690 |
+
{
|
| 1691 |
+
"epoch": 2.69,
|
| 1692 |
+
"learning_rate": 5.40022537278978e-06,
|
| 1693 |
+
"loss": 0.7701,
|
| 1694 |
+
"step": 6925
|
| 1695 |
+
},
|
| 1696 |
+
{
|
| 1697 |
+
"epoch": 2.69,
|
| 1698 |
+
"learning_rate": 5.07561723084089e-06,
|
| 1699 |
+
"loss": 0.7694,
|
| 1700 |
+
"step": 6950
|
| 1701 |
+
},
|
| 1702 |
+
{
|
| 1703 |
+
"epoch": 2.7,
|
| 1704 |
+
"learning_rate": 4.7608159941809e-06,
|
| 1705 |
+
"loss": 0.7659,
|
| 1706 |
+
"step": 6975
|
| 1707 |
+
},
|
| 1708 |
+
{
|
| 1709 |
+
"epoch": 2.71,
|
| 1710 |
+
"learning_rate": 4.455854185812047e-06,
|
| 1711 |
+
"loss": 0.7639,
|
| 1712 |
+
"step": 7000
|
| 1713 |
+
},
|
| 1714 |
+
{
|
| 1715 |
+
"epoch": 2.71,
|
| 1716 |
+
"eval_loss": 0.8055068850517273,
|
| 1717 |
+
"eval_runtime": 58.7443,
|
| 1718 |
+
"eval_samples_per_second": 12.427,
|
| 1719 |
+
"eval_steps_per_second": 0.902,
|
| 1720 |
+
"step": 7000
|
| 1721 |
+
},
|
| 1722 |
+
{
|
| 1723 |
+
"epoch": 2.72,
|
| 1724 |
+
"learning_rate": 4.160763312197513e-06,
|
| 1725 |
+
"loss": 0.7724,
|
| 1726 |
+
"step": 7025
|
| 1727 |
+
},
|
| 1728 |
+
{
|
| 1729 |
+
"epoch": 2.73,
|
| 1730 |
+
"learning_rate": 3.875573860006421e-06,
|
| 1731 |
+
"loss": 0.7696,
|
| 1732 |
+
"step": 7050
|
| 1733 |
+
},
|
| 1734 |
+
{
|
| 1735 |
+
"epoch": 2.74,
|
| 1736 |
+
"learning_rate": 3.6003152929641624e-06,
|
| 1737 |
+
"loss": 0.7625,
|
| 1738 |
+
"step": 7075
|
| 1739 |
+
},
|
| 1740 |
+
{
|
| 1741 |
+
"epoch": 2.75,
|
| 1742 |
+
"learning_rate": 3.335016048808437e-06,
|
| 1743 |
+
"loss": 0.7733,
|
| 1744 |
+
"step": 7100
|
| 1745 |
+
},
|
| 1746 |
+
{
|
| 1747 |
+
"epoch": 2.76,
|
| 1748 |
+
"learning_rate": 3.0797035363512193e-06,
|
| 1749 |
+
"loss": 0.7685,
|
| 1750 |
+
"step": 7125
|
| 1751 |
+
},
|
| 1752 |
+
{
|
| 1753 |
+
"epoch": 2.77,
|
| 1754 |
+
"learning_rate": 2.834404132647128e-06,
|
| 1755 |
+
"loss": 0.769,
|
| 1756 |
+
"step": 7150
|
| 1757 |
+
},
|
| 1758 |
+
{
|
| 1759 |
+
"epoch": 2.78,
|
| 1760 |
+
"learning_rate": 2.5991431802683262e-06,
|
| 1761 |
+
"loss": 0.7647,
|
| 1762 |
+
"step": 7175
|
| 1763 |
+
},
|
| 1764 |
+
{
|
| 1765 |
+
"epoch": 2.79,
|
| 1766 |
+
"learning_rate": 2.3739449846862826e-06,
|
| 1767 |
+
"loss": 0.7634,
|
| 1768 |
+
"step": 7200
|
| 1769 |
}
|
| 1770 |
],
|
| 1771 |
"max_steps": 7737,
|
| 1772 |
"num_train_epochs": 3,
|
| 1773 |
+
"total_flos": 3.100665541573791e+19,
|
| 1774 |
"trial_name": null,
|
| 1775 |
"trial_params": null
|
| 1776 |
}
|
{checkpoint-6800 β checkpoint-7200}/training_args.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4027
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c2f4cc723c65538de12d445f71a30b1610b702bfe771edb2385636bb8724bfd
|
| 3 |
size 4027
|