Training in progress, epoch 9, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2695611744
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95e876461f6941ed752cf45d9ac9302d2fcd14f16fa213ea0d206586aa9f8ad0
|
| 3 |
size 2695611744
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 26261260
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efc1f9e0d1f0ba25073b30251fc09e761cd12b900a86d5d2832f4e9ab7cf17ff
|
| 3 |
size 26261260
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15006
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ac7ad6975b02cb2fe1ae9b24f6d70c26049c580d43be5a2feb4f3aa6fc1aa47
|
| 3 |
size 15006
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c42147d2487e975dcb8b015449194c61c9350b5c1b3a114ecd6e3942d3403969
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
"eval_steps": 10,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1631,6 +1631,174 @@
|
|
| 1631 |
"eval_samples_per_second": 22.012,
|
| 1632 |
"eval_steps_per_second": 5.503,
|
| 1633 |
"step": 1160
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1634 |
}
|
| 1635 |
],
|
| 1636 |
"logging_steps": 10,
|
|
@@ -1645,7 +1813,7 @@
|
|
| 1645 |
"should_evaluate": false,
|
| 1646 |
"should_log": false,
|
| 1647 |
"should_save": true,
|
| 1648 |
-
"should_training_stop":
|
| 1649 |
},
|
| 1650 |
"attributes": {}
|
| 1651 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.929233772571987,
|
| 5 |
"eval_steps": 10,
|
| 6 |
+
"global_step": 1280,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1631 |
"eval_samples_per_second": 22.012,
|
| 1632 |
"eval_steps_per_second": 5.503,
|
| 1633 |
"step": 1160
|
| 1634 |
+
},
|
| 1635 |
+
{
|
| 1636 |
+
"epoch": 9.070278184480234,
|
| 1637 |
+
"grad_norm": 194401.953125,
|
| 1638 |
+
"learning_rate": 8.59375e-06,
|
| 1639 |
+
"loss": 306.9392,
|
| 1640 |
+
"step": 1170
|
| 1641 |
+
},
|
| 1642 |
+
{
|
| 1643 |
+
"epoch": 9.070278184480234,
|
| 1644 |
+
"eval_runtime": 19.6327,
|
| 1645 |
+
"eval_samples_per_second": 22.004,
|
| 1646 |
+
"eval_steps_per_second": 5.501,
|
| 1647 |
+
"step": 1170
|
| 1648 |
+
},
|
| 1649 |
+
{
|
| 1650 |
+
"epoch": 9.14836505612494,
|
| 1651 |
+
"grad_norm": 134935.84375,
|
| 1652 |
+
"learning_rate": 7.8125e-06,
|
| 1653 |
+
"loss": 309.7358,
|
| 1654 |
+
"step": 1180
|
| 1655 |
+
},
|
| 1656 |
+
{
|
| 1657 |
+
"epoch": 9.14836505612494,
|
| 1658 |
+
"eval_runtime": 19.625,
|
| 1659 |
+
"eval_samples_per_second": 22.013,
|
| 1660 |
+
"eval_steps_per_second": 5.503,
|
| 1661 |
+
"step": 1180
|
| 1662 |
+
},
|
| 1663 |
+
{
|
| 1664 |
+
"epoch": 9.226451927769643,
|
| 1665 |
+
"grad_norm": 169914.25,
|
| 1666 |
+
"learning_rate": 7.031250000000001e-06,
|
| 1667 |
+
"loss": 316.8677,
|
| 1668 |
+
"step": 1190
|
| 1669 |
+
},
|
| 1670 |
+
{
|
| 1671 |
+
"epoch": 9.226451927769643,
|
| 1672 |
+
"eval_runtime": 19.6279,
|
| 1673 |
+
"eval_samples_per_second": 22.01,
|
| 1674 |
+
"eval_steps_per_second": 5.502,
|
| 1675 |
+
"step": 1190
|
| 1676 |
+
},
|
| 1677 |
+
{
|
| 1678 |
+
"epoch": 9.304538799414349,
|
| 1679 |
+
"grad_norm": 145255.390625,
|
| 1680 |
+
"learning_rate": 6.25e-06,
|
| 1681 |
+
"loss": 281.0021,
|
| 1682 |
+
"step": 1200
|
| 1683 |
+
},
|
| 1684 |
+
{
|
| 1685 |
+
"epoch": 9.304538799414349,
|
| 1686 |
+
"eval_runtime": 19.6322,
|
| 1687 |
+
"eval_samples_per_second": 22.005,
|
| 1688 |
+
"eval_steps_per_second": 5.501,
|
| 1689 |
+
"step": 1200
|
| 1690 |
+
},
|
| 1691 |
+
{
|
| 1692 |
+
"epoch": 9.382625671059053,
|
| 1693 |
+
"grad_norm": 77945.0625,
|
| 1694 |
+
"learning_rate": 5.46875e-06,
|
| 1695 |
+
"loss": 225.1448,
|
| 1696 |
+
"step": 1210
|
| 1697 |
+
},
|
| 1698 |
+
{
|
| 1699 |
+
"epoch": 9.382625671059053,
|
| 1700 |
+
"eval_runtime": 19.6173,
|
| 1701 |
+
"eval_samples_per_second": 22.021,
|
| 1702 |
+
"eval_steps_per_second": 5.505,
|
| 1703 |
+
"step": 1210
|
| 1704 |
+
},
|
| 1705 |
+
{
|
| 1706 |
+
"epoch": 9.460712542703758,
|
| 1707 |
+
"grad_norm": 106036.7421875,
|
| 1708 |
+
"learning_rate": 4.6875000000000004e-06,
|
| 1709 |
+
"loss": 260.6584,
|
| 1710 |
+
"step": 1220
|
| 1711 |
+
},
|
| 1712 |
+
{
|
| 1713 |
+
"epoch": 9.460712542703758,
|
| 1714 |
+
"eval_runtime": 19.6147,
|
| 1715 |
+
"eval_samples_per_second": 22.024,
|
| 1716 |
+
"eval_steps_per_second": 5.506,
|
| 1717 |
+
"step": 1220
|
| 1718 |
+
},
|
| 1719 |
+
{
|
| 1720 |
+
"epoch": 9.538799414348462,
|
| 1721 |
+
"grad_norm": 103023.9296875,
|
| 1722 |
+
"learning_rate": 3.90625e-06,
|
| 1723 |
+
"loss": 246.4592,
|
| 1724 |
+
"step": 1230
|
| 1725 |
+
},
|
| 1726 |
+
{
|
| 1727 |
+
"epoch": 9.538799414348462,
|
| 1728 |
+
"eval_runtime": 19.6287,
|
| 1729 |
+
"eval_samples_per_second": 22.009,
|
| 1730 |
+
"eval_steps_per_second": 5.502,
|
| 1731 |
+
"step": 1230
|
| 1732 |
+
},
|
| 1733 |
+
{
|
| 1734 |
+
"epoch": 9.616886285993168,
|
| 1735 |
+
"grad_norm": 69943.5,
|
| 1736 |
+
"learning_rate": 3.125e-06,
|
| 1737 |
+
"loss": 254.6815,
|
| 1738 |
+
"step": 1240
|
| 1739 |
+
},
|
| 1740 |
+
{
|
| 1741 |
+
"epoch": 9.616886285993168,
|
| 1742 |
+
"eval_runtime": 19.6092,
|
| 1743 |
+
"eval_samples_per_second": 22.031,
|
| 1744 |
+
"eval_steps_per_second": 5.508,
|
| 1745 |
+
"step": 1240
|
| 1746 |
+
},
|
| 1747 |
+
{
|
| 1748 |
+
"epoch": 9.694973157637872,
|
| 1749 |
+
"grad_norm": 157203.640625,
|
| 1750 |
+
"learning_rate": 2.3437500000000002e-06,
|
| 1751 |
+
"loss": 247.872,
|
| 1752 |
+
"step": 1250
|
| 1753 |
+
},
|
| 1754 |
+
{
|
| 1755 |
+
"epoch": 9.694973157637872,
|
| 1756 |
+
"eval_runtime": 19.6301,
|
| 1757 |
+
"eval_samples_per_second": 22.007,
|
| 1758 |
+
"eval_steps_per_second": 5.502,
|
| 1759 |
+
"step": 1250
|
| 1760 |
+
},
|
| 1761 |
+
{
|
| 1762 |
+
"epoch": 9.773060029282577,
|
| 1763 |
+
"grad_norm": 40032.71484375,
|
| 1764 |
+
"learning_rate": 1.5625e-06,
|
| 1765 |
+
"loss": 239.199,
|
| 1766 |
+
"step": 1260
|
| 1767 |
+
},
|
| 1768 |
+
{
|
| 1769 |
+
"epoch": 9.773060029282577,
|
| 1770 |
+
"eval_runtime": 19.6402,
|
| 1771 |
+
"eval_samples_per_second": 21.996,
|
| 1772 |
+
"eval_steps_per_second": 5.499,
|
| 1773 |
+
"step": 1260
|
| 1774 |
+
},
|
| 1775 |
+
{
|
| 1776 |
+
"epoch": 9.851146900927281,
|
| 1777 |
+
"grad_norm": 77447.0546875,
|
| 1778 |
+
"learning_rate": 7.8125e-07,
|
| 1779 |
+
"loss": 222.1013,
|
| 1780 |
+
"step": 1270
|
| 1781 |
+
},
|
| 1782 |
+
{
|
| 1783 |
+
"epoch": 9.851146900927281,
|
| 1784 |
+
"eval_runtime": 19.623,
|
| 1785 |
+
"eval_samples_per_second": 22.015,
|
| 1786 |
+
"eval_steps_per_second": 5.504,
|
| 1787 |
+
"step": 1270
|
| 1788 |
+
},
|
| 1789 |
+
{
|
| 1790 |
+
"epoch": 9.929233772571987,
|
| 1791 |
+
"grad_norm": 100750.3046875,
|
| 1792 |
+
"learning_rate": 0.0,
|
| 1793 |
+
"loss": 234.0448,
|
| 1794 |
+
"step": 1280
|
| 1795 |
+
},
|
| 1796 |
+
{
|
| 1797 |
+
"epoch": 9.929233772571987,
|
| 1798 |
+
"eval_runtime": 19.621,
|
| 1799 |
+
"eval_samples_per_second": 22.017,
|
| 1800 |
+
"eval_steps_per_second": 5.504,
|
| 1801 |
+
"step": 1280
|
| 1802 |
}
|
| 1803 |
],
|
| 1804 |
"logging_steps": 10,
|
|
|
|
| 1813 |
"should_evaluate": false,
|
| 1814 |
"should_log": false,
|
| 1815 |
"should_save": true,
|
| 1816 |
+
"should_training_stop": true
|
| 1817 |
},
|
| 1818 |
"attributes": {}
|
| 1819 |
}
|