Training in progress, epoch 1, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step1150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1150/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1150/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1150/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0762ec721b93d1a0e10ada578c7538ccb87f010928b297b4505a645b3aec697
|
| 3 |
size 98088784
|
last-checkpoint/global_step1150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa1fd94ec52151cc8ac9118abed11c7a3e7a5973cd11ebe89a119398b424d040
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c28d9686a61da27a86d8432e4c1c6f8448d185febb9dc62e63da04f25aef4400
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1150/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ae989bbd465e0ea99a6c57870c991c867715b51aa2fce0ea3fb262bedbf097a
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1150/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb6576ebf655c7cbdf4585f340cae9ee67e706b6193cc4a03c863634805807ac
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1150/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9590c7cd283fe8a45bd69e18382a39acaeba5bc967eccf83e04a9c12c1af5ea8
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step1150
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32e2c783f044e208693875b6618820b4692ab8369227ed5fcfe75de8c98cb2f5
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9a009ec584589b323bfde6fb332132397a948a68665dbf47ae6b13108a76ac8
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9eba47f1f3f2aaeb1ee30212c3d28966395e9b15ce04d718f220251a1b885544
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:969e35a2eee24aa5d0640e276157b14ed3586e426e68f6139c80b9bdb3012f62
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f599b3f2fdaee9f298de483bc342667a86479cffdd08dfb05aebfb998561b471
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1724,6 +1724,84 @@
|
|
| 1724 |
"eval_samples_per_second": 125.981,
|
| 1725 |
"eval_steps_per_second": 15.756,
|
| 1726 |
"step": 1100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1727 |
}
|
| 1728 |
],
|
| 1729 |
"logging_steps": 5,
|
|
@@ -1752,7 +1830,7 @@
|
|
| 1752 |
"attributes": {}
|
| 1753 |
}
|
| 1754 |
},
|
| 1755 |
-
"total_flos": 5.
|
| 1756 |
"train_batch_size": 2,
|
| 1757 |
"trial_name": null,
|
| 1758 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6847204566001892,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.127372933251684,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 1150,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1724 |
"eval_samples_per_second": 125.981,
|
| 1725 |
"eval_steps_per_second": 15.756,
|
| 1726 |
"step": 1100
|
| 1727 |
+
},
|
| 1728 |
+
{
|
| 1729 |
+
"epoch": 1.0832823025107166,
|
| 1730 |
+
"grad_norm": 0.2049368917942047,
|
| 1731 |
+
"learning_rate": 9.97107409377544e-05,
|
| 1732 |
+
"loss": 0.7052,
|
| 1733 |
+
"step": 1105
|
| 1734 |
+
},
|
| 1735 |
+
{
|
| 1736 |
+
"epoch": 1.088181261481935,
|
| 1737 |
+
"grad_norm": 0.2253541499376297,
|
| 1738 |
+
"learning_rate": 9.960392926073467e-05,
|
| 1739 |
+
"loss": 0.7028,
|
| 1740 |
+
"step": 1110
|
| 1741 |
+
},
|
| 1742 |
+
{
|
| 1743 |
+
"epoch": 1.0930802204531538,
|
| 1744 |
+
"grad_norm": 0.2347995936870575,
|
| 1745 |
+
"learning_rate": 9.949666330595961e-05,
|
| 1746 |
+
"loss": 0.7055,
|
| 1747 |
+
"step": 1115
|
| 1748 |
+
},
|
| 1749 |
+
{
|
| 1750 |
+
"epoch": 1.0979791794243723,
|
| 1751 |
+
"grad_norm": 0.21330611407756805,
|
| 1752 |
+
"learning_rate": 9.938894417575287e-05,
|
| 1753 |
+
"loss": 0.7326,
|
| 1754 |
+
"step": 1120
|
| 1755 |
+
},
|
| 1756 |
+
{
|
| 1757 |
+
"epoch": 1.102878138395591,
|
| 1758 |
+
"grad_norm": 0.20777581632137299,
|
| 1759 |
+
"learning_rate": 9.928077297709514e-05,
|
| 1760 |
+
"loss": 0.7198,
|
| 1761 |
+
"step": 1125
|
| 1762 |
+
},
|
| 1763 |
+
{
|
| 1764 |
+
"epoch": 1.1077770973668095,
|
| 1765 |
+
"grad_norm": 0.22546184062957764,
|
| 1766 |
+
"learning_rate": 9.91721508216129e-05,
|
| 1767 |
+
"loss": 0.6848,
|
| 1768 |
+
"step": 1130
|
| 1769 |
+
},
|
| 1770 |
+
{
|
| 1771 |
+
"epoch": 1.1126760563380282,
|
| 1772 |
+
"grad_norm": 0.22367283701896667,
|
| 1773 |
+
"learning_rate": 9.90630788255668e-05,
|
| 1774 |
+
"loss": 0.7067,
|
| 1775 |
+
"step": 1135
|
| 1776 |
+
},
|
| 1777 |
+
{
|
| 1778 |
+
"epoch": 1.1175750153092467,
|
| 1779 |
+
"grad_norm": 0.2060408741235733,
|
| 1780 |
+
"learning_rate": 9.895355810984042e-05,
|
| 1781 |
+
"loss": 0.7032,
|
| 1782 |
+
"step": 1140
|
| 1783 |
+
},
|
| 1784 |
+
{
|
| 1785 |
+
"epoch": 1.1224739742804655,
|
| 1786 |
+
"grad_norm": 0.22378048300743103,
|
| 1787 |
+
"learning_rate": 9.884358979992852e-05,
|
| 1788 |
+
"loss": 0.7039,
|
| 1789 |
+
"step": 1145
|
| 1790 |
+
},
|
| 1791 |
+
{
|
| 1792 |
+
"epoch": 1.127372933251684,
|
| 1793 |
+
"grad_norm": 0.22920195758342743,
|
| 1794 |
+
"learning_rate": 9.873317502592563e-05,
|
| 1795 |
+
"loss": 0.6932,
|
| 1796 |
+
"step": 1150
|
| 1797 |
+
},
|
| 1798 |
+
{
|
| 1799 |
+
"epoch": 1.127372933251684,
|
| 1800 |
+
"eval_loss": 0.6847204566001892,
|
| 1801 |
+
"eval_runtime": 15.5644,
|
| 1802 |
+
"eval_samples_per_second": 125.864,
|
| 1803 |
+
"eval_steps_per_second": 15.741,
|
| 1804 |
+
"step": 1150
|
| 1805 |
}
|
| 1806 |
],
|
| 1807 |
"logging_steps": 5,
|
|
|
|
| 1830 |
"attributes": {}
|
| 1831 |
}
|
| 1832 |
},
|
| 1833 |
+
"total_flos": 5.936913961881436e+17,
|
| 1834 |
"train_batch_size": 2,
|
| 1835 |
"trial_name": null,
|
| 1836 |
"trial_params": null
|