Training in progress, epoch 1, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step1100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1100/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44441ec494dd1eeddb4f4b1f003d97643c00cca698aa10a2254f4b4bdacb8704
|
| 3 |
size 98088784
|
last-checkpoint/global_step1100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dee808df81fc01618f166dd9adc8f410006ed9e8e5bdfbe48da6338752ec172
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff19cac0e35b930e98f201f70574798e72b0216e01348755c4e6a66033319aa5
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:392b29b2fbff6661a95bc8e5314c5f3f6d23ecd4140810512c3f17d4227567a0
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:126b4cb8d8675f0b95af0b9b07199ccc0b35045b0f45a165f3677b5406ba7b15
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1100/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:472d3069e30cd1792f3b039921a033eb3a5631d786c055af3ce407fa6487cfb6
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step1100
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ceb0ba0fefc4682de8ae9d502be348b266aef51d2c517ea10d576e3957cf16e
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7f0589f852327dcbb3a04372c5c9b3b3aed87183a18e4e78c8842af6ccc94ea
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f4aa42ba29fbaf89d327737bbdbe96fa7085e909f789a4b592724ea39fd0491
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ed1a940d9e87126bc4746d90070268ad6d65dcc8b4794a5c83d93738db2dc6b
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41aec0a0f7fd8e266c974eb692fe1a8c668e3b6745d80b43c921e581b091927b
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1646,6 +1646,84 @@
|
|
| 1646 |
"eval_samples_per_second": 126.592,
|
| 1647 |
"eval_steps_per_second": 15.832,
|
| 1648 |
"step": 1050
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1649 |
}
|
| 1650 |
],
|
| 1651 |
"logging_steps": 5,
|
|
@@ -1674,7 +1752,7 @@
|
|
| 1674 |
"attributes": {}
|
| 1675 |
}
|
| 1676 |
},
|
| 1677 |
-
"total_flos": 5.
|
| 1678 |
"train_batch_size": 2,
|
| 1679 |
"trial_name": null,
|
| 1680 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6880703568458557,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0783833435394978,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 1100,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1646 |
"eval_samples_per_second": 126.592,
|
| 1647 |
"eval_steps_per_second": 15.832,
|
| 1648 |
"step": 1050
|
| 1649 |
+
},
|
| 1650 |
+
{
|
| 1651 |
+
"epoch": 1.0342927127985304,
|
| 1652 |
+
"grad_norm": 0.22498337924480438,
|
| 1653 |
+
"learning_rate": 0.0001007536332725504,
|
| 1654 |
+
"loss": 0.7153,
|
| 1655 |
+
"step": 1055
|
| 1656 |
+
},
|
| 1657 |
+
{
|
| 1658 |
+
"epoch": 1.039191671769749,
|
| 1659 |
+
"grad_norm": 0.19475223124027252,
|
| 1660 |
+
"learning_rate": 0.00010065142396828989,
|
| 1661 |
+
"loss": 0.6969,
|
| 1662 |
+
"step": 1060
|
| 1663 |
+
},
|
| 1664 |
+
{
|
| 1665 |
+
"epoch": 1.0440906307409676,
|
| 1666 |
+
"grad_norm": 0.20079198479652405,
|
| 1667 |
+
"learning_rate": 0.00010054874962164521,
|
| 1668 |
+
"loss": 0.6906,
|
| 1669 |
+
"step": 1065
|
| 1670 |
+
},
|
| 1671 |
+
{
|
| 1672 |
+
"epoch": 1.0489895897121861,
|
| 1673 |
+
"grad_norm": 0.18500946462154388,
|
| 1674 |
+
"learning_rate": 0.00010044561128775412,
|
| 1675 |
+
"loss": 0.7027,
|
| 1676 |
+
"step": 1070
|
| 1677 |
+
},
|
| 1678 |
+
{
|
| 1679 |
+
"epoch": 1.0538885486834049,
|
| 1680 |
+
"grad_norm": 0.18668654561042786,
|
| 1681 |
+
"learning_rate": 0.0001003420100265226,
|
| 1682 |
+
"loss": 0.7157,
|
| 1683 |
+
"step": 1075
|
| 1684 |
+
},
|
| 1685 |
+
{
|
| 1686 |
+
"epoch": 1.0587875076546234,
|
| 1687 |
+
"grad_norm": 0.21674495935440063,
|
| 1688 |
+
"learning_rate": 0.00010023794690261389,
|
| 1689 |
+
"loss": 0.7208,
|
| 1690 |
+
"step": 1080
|
| 1691 |
+
},
|
| 1692 |
+
{
|
| 1693 |
+
"epoch": 1.063686466625842,
|
| 1694 |
+
"grad_norm": 0.20600494742393494,
|
| 1695 |
+
"learning_rate": 0.0001001334229854376,
|
| 1696 |
+
"loss": 0.6957,
|
| 1697 |
+
"step": 1085
|
| 1698 |
+
},
|
| 1699 |
+
{
|
| 1700 |
+
"epoch": 1.0685854255970606,
|
| 1701 |
+
"grad_norm": 0.2198040932416916,
|
| 1702 |
+
"learning_rate": 0.0001000284393491387,
|
| 1703 |
+
"loss": 0.7059,
|
| 1704 |
+
"step": 1090
|
| 1705 |
+
},
|
| 1706 |
+
{
|
| 1707 |
+
"epoch": 1.0734843845682793,
|
| 1708 |
+
"grad_norm": 0.225518599152565,
|
| 1709 |
+
"learning_rate": 9.99229970725865e-05,
|
| 1710 |
+
"loss": 0.7017,
|
| 1711 |
+
"step": 1095
|
| 1712 |
+
},
|
| 1713 |
+
{
|
| 1714 |
+
"epoch": 1.0783833435394978,
|
| 1715 |
+
"grad_norm": 0.2226964235305786,
|
| 1716 |
+
"learning_rate": 9.981709723936353e-05,
|
| 1717 |
+
"loss": 0.6967,
|
| 1718 |
+
"step": 1100
|
| 1719 |
+
},
|
| 1720 |
+
{
|
| 1721 |
+
"epoch": 1.0783833435394978,
|
| 1722 |
+
"eval_loss": 0.6880703568458557,
|
| 1723 |
+
"eval_runtime": 15.55,
|
| 1724 |
+
"eval_samples_per_second": 125.981,
|
| 1725 |
+
"eval_steps_per_second": 15.756,
|
| 1726 |
+
"step": 1100
|
| 1727 |
}
|
| 1728 |
],
|
| 1729 |
"logging_steps": 5,
|
|
|
|
| 1752 |
"attributes": {}
|
| 1753 |
}
|
| 1754 |
},
|
| 1755 |
+
"total_flos": 5.6795728734846976e+17,
|
| 1756 |
"train_batch_size": 2,
|
| 1757 |
"trial_name": null,
|
| 1758 |
"trial_params": null
|