Commit
Β·
79fef62
1
Parent(s):
ffa4692
Training in progress, step 6900
Browse files- adapter_config.json +4 -4
- adapter_model.bin +1 -1
- {checkpoint-6500 β checkpoint-6800/adapter_model}/README.md +0 -0
- {checkpoint-6500 β checkpoint-6800/adapter_model}/adapter_config.json +0 -0
- {checkpoint-6500 β checkpoint-6800/adapter_model}/adapter_model.bin +1 -1
- {checkpoint-6500/adapter_model β checkpoint-6900}/README.md +0 -0
- {checkpoint-6500/adapter_model β checkpoint-6900}/adapter_config.json +4 -4
- {checkpoint-6500/adapter_model β checkpoint-6900}/adapter_model.bin +1 -1
- {checkpoint-6500 β checkpoint-6900}/optimizer.pt +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_0.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_1.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_10.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_11.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_12.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_13.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_2.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_3.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_4.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_5.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_6.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_7.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_8.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_9.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/scheduler.pt +1 -1
- {checkpoint-6500 β checkpoint-6900}/trainer_state.json +99 -3
- {checkpoint-6500 β checkpoint-6900}/training_args.bin +1 -1
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -14,13 +14,13 @@
|
|
| 14 |
"r": 32,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
| 17 |
-
"up_proj",
|
| 18 |
-
"gate_proj",
|
| 19 |
"down_proj",
|
|
|
|
| 20 |
"q_proj",
|
| 21 |
"k_proj",
|
| 22 |
-
"
|
| 23 |
-
"v_proj"
|
|
|
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
|
|
|
| 14 |
"r": 32,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
|
|
|
|
|
|
| 17 |
"down_proj",
|
| 18 |
+
"up_proj",
|
| 19 |
"q_proj",
|
| 20 |
"k_proj",
|
| 21 |
+
"gate_proj",
|
| 22 |
+
"v_proj",
|
| 23 |
+
"o_proj"
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
adapter_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 500897101
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dce28954f69ce7f466b50fc774c94fb6da869b76f94baf208c3e276b1365db2
|
| 3 |
size 500897101
|
{checkpoint-6500 β checkpoint-6800/adapter_model}/README.md
RENAMED
|
File without changes
|
{checkpoint-6500 β checkpoint-6800/adapter_model}/adapter_config.json
RENAMED
|
File without changes
|
{checkpoint-6500 β checkpoint-6800/adapter_model}/adapter_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 500897101
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:caee01e1da9f7e92223499414f8bb25b28ea34daf6fb927c9bd869dcba2559fd
|
| 3 |
size 500897101
|
{checkpoint-6500/adapter_model β checkpoint-6900}/README.md
RENAMED
|
File without changes
|
{checkpoint-6500/adapter_model β checkpoint-6900}/adapter_config.json
RENAMED
|
@@ -14,13 +14,13 @@
|
|
| 14 |
"r": 32,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
| 17 |
-
"up_proj",
|
| 18 |
-
"gate_proj",
|
| 19 |
"down_proj",
|
|
|
|
| 20 |
"q_proj",
|
| 21 |
"k_proj",
|
| 22 |
-
"
|
| 23 |
-
"v_proj"
|
|
|
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
|
|
|
| 14 |
"r": 32,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
|
|
|
|
|
|
| 17 |
"down_proj",
|
| 18 |
+
"up_proj",
|
| 19 |
"q_proj",
|
| 20 |
"k_proj",
|
| 21 |
+
"gate_proj",
|
| 22 |
+
"v_proj",
|
| 23 |
+
"o_proj"
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
{checkpoint-6500/adapter_model β checkpoint-6900}/adapter_model.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 500897101
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dce28954f69ce7f466b50fc774c94fb6da869b76f94baf208c3e276b1365db2
|
| 3 |
size 500897101
|
{checkpoint-6500 β checkpoint-6900}/optimizer.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1001752701
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e51694cb0a1cd9603ece0232dabcd7a370a6f8e378f0b01213ceaaf413c4d0c
|
| 3 |
size 1001752701
|
{checkpoint-6500 β checkpoint-6900}/rng_state_0.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:361a0cbcd4b75d795f0109f290c642aca880cc06772f00227ba1e0217fc939ef
|
| 3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_1.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f5123ae610aa26457a3fa34fd70ab7e4ab44cb21dc09fd095aad55a2d15294a
|
| 3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_10.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27789
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a213c64ebe2689a3f02bbe5b0ee2c66feb57c0f5f4f8621c0ec2f63c6c590892
|
| 3 |
size 27789
|
{checkpoint-6500 β checkpoint-6900}/rng_state_11.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27789
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:531d6f0912536de9c8ce68b83c17fc005458796d7921c1fe083548fda04d57a7
|
| 3 |
size 27789
|
{checkpoint-6500 β checkpoint-6900}/rng_state_12.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27789
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dad9e94de76a75ad267b7650de4df602944013ed9b553b148fa94de6ea86bb52
|
| 3 |
size 27789
|
{checkpoint-6500 β checkpoint-6900}/rng_state_13.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27789
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f9e0a77144d5a662958f707fc8b494554054fd279f5a3dc74159dc97e6d1e78
|
| 3 |
size 27789
|
{checkpoint-6500 β checkpoint-6900}/rng_state_2.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5b931b3c81ed25f1fcd0971b6da08a3869bfd28a0489528ed3c96c3619fa557
|
| 3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_3.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e121c889141260915892ddfe435284a35a9f246ef67720bdc33081a846ab328
|
| 3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_4.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f912038cfe5be27104676eb3640c3beb52c1c4c80bafea1732f0443a67ad4b4
|
| 3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_5.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e7e05a90589de96c2e2748fa928890eb3ae0f6585d2b9768316f612aa8cf908
|
| 3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_6.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dfaec3c28cf52d29cdacae2acf43363d9aea5a881877b06a0055a14c1b7aed6a
|
| 3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_7.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c35c98f247077b0ff5a006aeba6561c7e6bf2e9fd8969c9bc35d64aaf01cf14a
|
| 3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_8.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86a908ac53361e9d5c34680f8e1789e69824f75a3e0b100140c8133e52f8fbed
|
| 3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_9.pth
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27772
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5597a151c26d0f9f0ba329a3cf67af7f95b7fed3d58ea4461f586948ea11f890
|
| 3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/scheduler.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed903a2193f395f38212602a53ab2a5c251266f0ae7d7dc0557e3b565ed8e240
|
| 3 |
size 627
|
{checkpoint-6500 β checkpoint-6900}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 2.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1590,11 +1590,107 @@
|
|
| 1590 |
"learning_rate": 1.2382654384049475e-05,
|
| 1591 |
"loss": 0.7714,
|
| 1592 |
"step": 6500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1593 |
}
|
| 1594 |
],
|
| 1595 |
"max_steps": 7737,
|
| 1596 |
"num_train_epochs": 3,
|
| 1597 |
-
"total_flos": 2.
|
| 1598 |
"trial_name": null,
|
| 1599 |
"trial_params": null
|
| 1600 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.6754556029468786,
|
| 5 |
+
"global_step": 6900,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1590 |
"learning_rate": 1.2382654384049475e-05,
|
| 1591 |
"loss": 0.7714,
|
| 1592 |
"step": 6500
|
| 1593 |
+
},
|
| 1594 |
+
{
|
| 1595 |
+
"epoch": 2.53,
|
| 1596 |
+
"learning_rate": 1.1897272010289884e-05,
|
| 1597 |
+
"loss": 0.7701,
|
| 1598 |
+
"step": 6525
|
| 1599 |
+
},
|
| 1600 |
+
{
|
| 1601 |
+
"epoch": 2.54,
|
| 1602 |
+
"learning_rate": 1.1420991777454315e-05,
|
| 1603 |
+
"loss": 0.7628,
|
| 1604 |
+
"step": 6550
|
| 1605 |
+
},
|
| 1606 |
+
{
|
| 1607 |
+
"epoch": 2.55,
|
| 1608 |
+
"learning_rate": 1.0953862891394795e-05,
|
| 1609 |
+
"loss": 0.7661,
|
| 1610 |
+
"step": 6575
|
| 1611 |
+
},
|
| 1612 |
+
{
|
| 1613 |
+
"epoch": 2.56,
|
| 1614 |
+
"learning_rate": 1.0495933612511976e-05,
|
| 1615 |
+
"loss": 0.7729,
|
| 1616 |
+
"step": 6600
|
| 1617 |
+
},
|
| 1618 |
+
{
|
| 1619 |
+
"epoch": 2.57,
|
| 1620 |
+
"learning_rate": 1.0047251250769175e-05,
|
| 1621 |
+
"loss": 0.772,
|
| 1622 |
+
"step": 6625
|
| 1623 |
+
},
|
| 1624 |
+
{
|
| 1625 |
+
"epoch": 2.58,
|
| 1626 |
+
"learning_rate": 9.60786216080466e-06,
|
| 1627 |
+
"loss": 0.7702,
|
| 1628 |
+
"step": 6650
|
| 1629 |
+
},
|
| 1630 |
+
{
|
| 1631 |
+
"epoch": 2.59,
|
| 1632 |
+
"learning_rate": 9.177811737142627e-06,
|
| 1633 |
+
"loss": 0.7711,
|
| 1634 |
+
"step": 6675
|
| 1635 |
+
},
|
| 1636 |
+
{
|
| 1637 |
+
"epoch": 2.6,
|
| 1638 |
+
"learning_rate": 8.757144409503359e-06,
|
| 1639 |
+
"loss": 0.765,
|
| 1640 |
+
"step": 6700
|
| 1641 |
+
},
|
| 1642 |
+
{
|
| 1643 |
+
"epoch": 2.61,
|
| 1644 |
+
"learning_rate": 8.34590363821306e-06,
|
| 1645 |
+
"loss": 0.7713,
|
| 1646 |
+
"step": 6725
|
| 1647 |
+
},
|
| 1648 |
+
{
|
| 1649 |
+
"epoch": 2.62,
|
| 1650 |
+
"learning_rate": 7.944131909713859e-06,
|
| 1651 |
+
"loss": 0.7631,
|
| 1652 |
+
"step": 6750
|
| 1653 |
+
},
|
| 1654 |
+
{
|
| 1655 |
+
"epoch": 2.63,
|
| 1656 |
+
"learning_rate": 7.551870732174416e-06,
|
| 1657 |
+
"loss": 0.767,
|
| 1658 |
+
"step": 6775
|
| 1659 |
+
},
|
| 1660 |
+
{
|
| 1661 |
+
"epoch": 2.64,
|
| 1662 |
+
"learning_rate": 7.169160631201566e-06,
|
| 1663 |
+
"loss": 0.7692,
|
| 1664 |
+
"step": 6800
|
| 1665 |
+
},
|
| 1666 |
+
{
|
| 1667 |
+
"epoch": 2.65,
|
| 1668 |
+
"learning_rate": 6.796041145653553e-06,
|
| 1669 |
+
"loss": 0.7677,
|
| 1670 |
+
"step": 6825
|
| 1671 |
+
},
|
| 1672 |
+
{
|
| 1673 |
+
"epoch": 2.66,
|
| 1674 |
+
"learning_rate": 6.432550823555128e-06,
|
| 1675 |
+
"loss": 0.7706,
|
| 1676 |
+
"step": 6850
|
| 1677 |
+
},
|
| 1678 |
+
{
|
| 1679 |
+
"epoch": 2.67,
|
| 1680 |
+
"learning_rate": 6.078727218115043e-06,
|
| 1681 |
+
"loss": 0.7678,
|
| 1682 |
+
"step": 6875
|
| 1683 |
+
},
|
| 1684 |
+
{
|
| 1685 |
+
"epoch": 2.68,
|
| 1686 |
+
"learning_rate": 5.734606883846338e-06,
|
| 1687 |
+
"loss": 0.7717,
|
| 1688 |
+
"step": 6900
|
| 1689 |
}
|
| 1690 |
],
|
| 1691 |
"max_steps": 7737,
|
| 1692 |
"num_train_epochs": 3,
|
| 1693 |
+
"total_flos": 2.9715299616168083e+19,
|
| 1694 |
"trial_name": null,
|
| 1695 |
"trial_params": null
|
| 1696 |
}
|
{checkpoint-6500 β checkpoint-6900}/training_args.bin
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4027
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c2f4cc723c65538de12d445f71a30b1610b702bfe771edb2385636bb8724bfd
|
| 3 |
size 4027
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4027
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c2f4cc723c65538de12d445f71a30b1610b702bfe771edb2385636bb8724bfd
|
| 3 |
size 4027
|