Training in progress, step 10000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42f1ca9dac6b6ebcdce4b00359a28210be4ba525244b591572f5b2287e25b0f9
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93ef9d4a1c4c97b75d44e8a9d7ebd61f014e4cbe01f8111a09cb70d30d2060b0
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c0943241d3c12af607553b4e6abe354fb336ff905d2c1fb46e1651cbdb980d6
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:233660262675d93c5fec35f1803b0486ea65b8e55d6ffa01a964eeb9acc8c92f
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:250db0f613d9618dc37866ea93a85951fa852bff7fd15eb41a9bb89d17f93c5f
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd971f0e9f9fab99e52eb560d68aa87eb93fac7faa2dc0c68c4d5d10aa759271
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1642,6 +1642,92 @@
|
|
| 1642 |
"eval_samples_per_second": 58.852,
|
| 1643 |
"eval_steps_per_second": 14.713,
|
| 1644 |
"step": 9500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1645 |
}
|
| 1646 |
],
|
| 1647 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 10000,
|
| 3 |
+
"best_metric": 1.8548085689544678,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-10000",
|
| 5 |
+
"epoch": 0.7691716021844474,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 10000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1642 |
"eval_samples_per_second": 58.852,
|
| 1643 |
"eval_steps_per_second": 14.713,
|
| 1644 |
"step": 9500
|
| 1645 |
+
},
|
| 1646 |
+
{
|
| 1647 |
+
"epoch": 0.7345588800861472,
|
| 1648 |
+
"grad_norm": 1.6844549179077148,
|
| 1649 |
+
"learning_rate": 0.00015302703685427112,
|
| 1650 |
+
"loss": 1.8262,
|
| 1651 |
+
"step": 9550
|
| 1652 |
+
},
|
| 1653 |
+
{
|
| 1654 |
+
"epoch": 0.7384047380970694,
|
| 1655 |
+
"grad_norm": 2.512157678604126,
|
| 1656 |
+
"learning_rate": 0.0001527673168324546,
|
| 1657 |
+
"loss": 1.9319,
|
| 1658 |
+
"step": 9600
|
| 1659 |
+
},
|
| 1660 |
+
{
|
| 1661 |
+
"epoch": 0.7422505961079917,
|
| 1662 |
+
"grad_norm": 1.3238016366958618,
|
| 1663 |
+
"learning_rate": 0.00015250759681063812,
|
| 1664 |
+
"loss": 1.9001,
|
| 1665 |
+
"step": 9650
|
| 1666 |
+
},
|
| 1667 |
+
{
|
| 1668 |
+
"epoch": 0.7460964541189139,
|
| 1669 |
+
"grad_norm": 1.83181631565094,
|
| 1670 |
+
"learning_rate": 0.00015224787678882166,
|
| 1671 |
+
"loss": 1.8324,
|
| 1672 |
+
"step": 9700
|
| 1673 |
+
},
|
| 1674 |
+
{
|
| 1675 |
+
"epoch": 0.7499423121298362,
|
| 1676 |
+
"grad_norm": 1.6106966733932495,
|
| 1677 |
+
"learning_rate": 0.00015198815676700518,
|
| 1678 |
+
"loss": 1.9535,
|
| 1679 |
+
"step": 9750
|
| 1680 |
+
},
|
| 1681 |
+
{
|
| 1682 |
+
"epoch": 0.7499423121298362,
|
| 1683 |
+
"eval_loss": 1.873831033706665,
|
| 1684 |
+
"eval_runtime": 17.0343,
|
| 1685 |
+
"eval_samples_per_second": 58.705,
|
| 1686 |
+
"eval_steps_per_second": 14.676,
|
| 1687 |
+
"step": 9750
|
| 1688 |
+
},
|
| 1689 |
+
{
|
| 1690 |
+
"epoch": 0.7537881701407584,
|
| 1691 |
+
"grad_norm": 2.3586697578430176,
|
| 1692 |
+
"learning_rate": 0.0001517284367451887,
|
| 1693 |
+
"loss": 1.858,
|
| 1694 |
+
"step": 9800
|
| 1695 |
+
},
|
| 1696 |
+
{
|
| 1697 |
+
"epoch": 0.7576340281516807,
|
| 1698 |
+
"grad_norm": 0.7499716877937317,
|
| 1699 |
+
"learning_rate": 0.00015146871672337223,
|
| 1700 |
+
"loss": 1.866,
|
| 1701 |
+
"step": 9850
|
| 1702 |
+
},
|
| 1703 |
+
{
|
| 1704 |
+
"epoch": 0.7614798861626029,
|
| 1705 |
+
"grad_norm": 1.3573709726333618,
|
| 1706 |
+
"learning_rate": 0.00015120899670155574,
|
| 1707 |
+
"loss": 1.8636,
|
| 1708 |
+
"step": 9900
|
| 1709 |
+
},
|
| 1710 |
+
{
|
| 1711 |
+
"epoch": 0.7653257441735252,
|
| 1712 |
+
"grad_norm": 2.271859884262085,
|
| 1713 |
+
"learning_rate": 0.00015094927667973923,
|
| 1714 |
+
"loss": 1.8625,
|
| 1715 |
+
"step": 9950
|
| 1716 |
+
},
|
| 1717 |
+
{
|
| 1718 |
+
"epoch": 0.7691716021844474,
|
| 1719 |
+
"grad_norm": 1.8813310861587524,
|
| 1720 |
+
"learning_rate": 0.00015068955665792277,
|
| 1721 |
+
"loss": 1.9318,
|
| 1722 |
+
"step": 10000
|
| 1723 |
+
},
|
| 1724 |
+
{
|
| 1725 |
+
"epoch": 0.7691716021844474,
|
| 1726 |
+
"eval_loss": 1.8548085689544678,
|
| 1727 |
+
"eval_runtime": 17.0223,
|
| 1728 |
+
"eval_samples_per_second": 58.746,
|
| 1729 |
+
"eval_steps_per_second": 14.687,
|
| 1730 |
+
"step": 10000
|
| 1731 |
}
|
| 1732 |
],
|
| 1733 |
"logging_steps": 50,
|