Training in progress, step 10000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbda29e3fc4574d26f4c2d88945024a17cf0a9095cfd76acea4713a70561f4b9
|
| 3 |
size 36730224
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f807c16f8bec038522bfe252b213fc1087e8a39fb1f495399cdf7ee1d92e00a6
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b4e6f78506ab09c8a3ff311a2dfeb7a6190585c49701c5bd2fcc763c2b50448
|
| 3 |
+
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5057516ff1b0b207608fc5bf21c504e3b16c8f39cb674a438cf642593270922
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf5349680f22d749c8735deb8f593d381f787d0e5d89e99661139aee18144bbf
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 9500,
|
| 3 |
"best_metric": 4.328299045562744,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-9500",
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1642,6 +1642,92 @@
|
|
| 1642 |
"eval_samples_per_second": 53.014,
|
| 1643 |
"eval_steps_per_second": 13.253,
|
| 1644 |
"step": 9500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1645 |
}
|
| 1646 |
],
|
| 1647 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 9500,
|
| 3 |
"best_metric": 4.328299045562744,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-9500",
|
| 5 |
+
"epoch": 0.7691716021844474,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 10000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1642 |
"eval_samples_per_second": 53.014,
|
| 1643 |
"eval_steps_per_second": 13.253,
|
| 1644 |
"step": 9500
|
| 1645 |
+
},
|
| 1646 |
+
{
|
| 1647 |
+
"epoch": 0.7345588800861472,
|
| 1648 |
+
"grad_norm": 3.9685990810394287,
|
| 1649 |
+
"learning_rate": 9.141355097239072e-05,
|
| 1650 |
+
"loss": 4.325,
|
| 1651 |
+
"step": 9550
|
| 1652 |
+
},
|
| 1653 |
+
{
|
| 1654 |
+
"epoch": 0.7384047380970694,
|
| 1655 |
+
"grad_norm": 5.303285121917725,
|
| 1656 |
+
"learning_rate": 9.11562074923716e-05,
|
| 1657 |
+
"loss": 4.4277,
|
| 1658 |
+
"step": 9600
|
| 1659 |
+
},
|
| 1660 |
+
{
|
| 1661 |
+
"epoch": 0.7422505961079917,
|
| 1662 |
+
"grad_norm": 2.70599627494812,
|
| 1663 |
+
"learning_rate": 9.089886401235249e-05,
|
| 1664 |
+
"loss": 4.4329,
|
| 1665 |
+
"step": 9650
|
| 1666 |
+
},
|
| 1667 |
+
{
|
| 1668 |
+
"epoch": 0.7460964541189139,
|
| 1669 |
+
"grad_norm": 4.711449146270752,
|
| 1670 |
+
"learning_rate": 9.064152053233336e-05,
|
| 1671 |
+
"loss": 4.251,
|
| 1672 |
+
"step": 9700
|
| 1673 |
+
},
|
| 1674 |
+
{
|
| 1675 |
+
"epoch": 0.7499423121298362,
|
| 1676 |
+
"grad_norm": 3.0169851779937744,
|
| 1677 |
+
"learning_rate": 9.038417705231424e-05,
|
| 1678 |
+
"loss": 4.3483,
|
| 1679 |
+
"step": 9750
|
| 1680 |
+
},
|
| 1681 |
+
{
|
| 1682 |
+
"epoch": 0.7499423121298362,
|
| 1683 |
+
"eval_loss": 4.341108322143555,
|
| 1684 |
+
"eval_runtime": 18.9063,
|
| 1685 |
+
"eval_samples_per_second": 52.893,
|
| 1686 |
+
"eval_steps_per_second": 13.223,
|
| 1687 |
+
"step": 9750
|
| 1688 |
+
},
|
| 1689 |
+
{
|
| 1690 |
+
"epoch": 0.7537881701407584,
|
| 1691 |
+
"grad_norm": 3.375880002975464,
|
| 1692 |
+
"learning_rate": 9.012683357229513e-05,
|
| 1693 |
+
"loss": 4.313,
|
| 1694 |
+
"step": 9800
|
| 1695 |
+
},
|
| 1696 |
+
{
|
| 1697 |
+
"epoch": 0.7576340281516807,
|
| 1698 |
+
"grad_norm": 1.707850456237793,
|
| 1699 |
+
"learning_rate": 8.986949009227601e-05,
|
| 1700 |
+
"loss": 4.3062,
|
| 1701 |
+
"step": 9850
|
| 1702 |
+
},
|
| 1703 |
+
{
|
| 1704 |
+
"epoch": 0.7614798861626029,
|
| 1705 |
+
"grad_norm": 3.6718738079071045,
|
| 1706 |
+
"learning_rate": 8.96121466122569e-05,
|
| 1707 |
+
"loss": 4.4415,
|
| 1708 |
+
"step": 9900
|
| 1709 |
+
},
|
| 1710 |
+
{
|
| 1711 |
+
"epoch": 0.7653257441735252,
|
| 1712 |
+
"grad_norm": 3.5382699966430664,
|
| 1713 |
+
"learning_rate": 8.935480313223778e-05,
|
| 1714 |
+
"loss": 4.3754,
|
| 1715 |
+
"step": 9950
|
| 1716 |
+
},
|
| 1717 |
+
{
|
| 1718 |
+
"epoch": 0.7691716021844474,
|
| 1719 |
+
"grad_norm": 4.678229808807373,
|
| 1720 |
+
"learning_rate": 8.909745965221865e-05,
|
| 1721 |
+
"loss": 4.4404,
|
| 1722 |
+
"step": 10000
|
| 1723 |
+
},
|
| 1724 |
+
{
|
| 1725 |
+
"epoch": 0.7691716021844474,
|
| 1726 |
+
"eval_loss": 4.3746819496154785,
|
| 1727 |
+
"eval_runtime": 18.7221,
|
| 1728 |
+
"eval_samples_per_second": 53.413,
|
| 1729 |
+
"eval_steps_per_second": 13.353,
|
| 1730 |
+
"step": 10000
|
| 1731 |
}
|
| 1732 |
],
|
| 1733 |
"logging_steps": 50,
|