Training in progress, step 10000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb0e3c8eeaeaaeed92b7974a796b8555d972ab04a230ed8a0aa39040f919522e
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbc9386aa433cf09ea700ba9554d99ebbf74421553fda8c6e9f15d579ffea58d
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:845a998395b0eed3947658b954a32b49a265bc7bcb21586d38b38bbd5b7b0ca3
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45788da03bae97e95c3537137dc4032cafd5cea57659195759c4661fda86a92e
|
| 3 |
+
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59deba323ff65998dc2bdf22d9e342db9cffc30bd0e70e0e297b662f8184f414
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab5aa9661d156b69bcd5976a8b5f6e5eaf3f91f473a83c47e1ba991c358f74ef
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1642,6 +1642,92 @@
|
|
| 1642 |
"eval_samples_per_second": 59.205,
|
| 1643 |
"eval_steps_per_second": 14.801,
|
| 1644 |
"step": 9500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1645 |
}
|
| 1646 |
],
|
| 1647 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 10000,
|
| 3 |
+
"best_metric": 1.36810302734375,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-10000",
|
| 5 |
+
"epoch": 0.7691716021844474,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 10000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1642 |
"eval_samples_per_second": 59.205,
|
| 1643 |
"eval_steps_per_second": 14.801,
|
| 1644 |
"step": 9500
|
| 1645 |
+
},
|
| 1646 |
+
{
|
| 1647 |
+
"epoch": 0.7345588800861472,
|
| 1648 |
+
"grad_norm": 1.0704309940338135,
|
| 1649 |
+
"learning_rate": 0.00015302184245383479,
|
| 1650 |
+
"loss": 1.3428,
|
| 1651 |
+
"step": 9550
|
| 1652 |
+
},
|
| 1653 |
+
{
|
| 1654 |
+
"epoch": 0.7384047380970694,
|
| 1655 |
+
"grad_norm": 1.3625832796096802,
|
| 1656 |
+
"learning_rate": 0.00015276212243201827,
|
| 1657 |
+
"loss": 1.3911,
|
| 1658 |
+
"step": 9600
|
| 1659 |
+
},
|
| 1660 |
+
{
|
| 1661 |
+
"epoch": 0.7422505961079917,
|
| 1662 |
+
"grad_norm": 0.836138129234314,
|
| 1663 |
+
"learning_rate": 0.0001525024024102018,
|
| 1664 |
+
"loss": 1.3729,
|
| 1665 |
+
"step": 9650
|
| 1666 |
+
},
|
| 1667 |
+
{
|
| 1668 |
+
"epoch": 0.7460964541189139,
|
| 1669 |
+
"grad_norm": 1.6029527187347412,
|
| 1670 |
+
"learning_rate": 0.00015224268238838532,
|
| 1671 |
+
"loss": 1.3314,
|
| 1672 |
+
"step": 9700
|
| 1673 |
+
},
|
| 1674 |
+
{
|
| 1675 |
+
"epoch": 0.7499423121298362,
|
| 1676 |
+
"grad_norm": 0.9241604804992676,
|
| 1677 |
+
"learning_rate": 0.00015198296236656884,
|
| 1678 |
+
"loss": 1.3965,
|
| 1679 |
+
"step": 9750
|
| 1680 |
+
},
|
| 1681 |
+
{
|
| 1682 |
+
"epoch": 0.7499423121298362,
|
| 1683 |
+
"eval_loss": 1.3779255151748657,
|
| 1684 |
+
"eval_runtime": 17.0161,
|
| 1685 |
+
"eval_samples_per_second": 58.768,
|
| 1686 |
+
"eval_steps_per_second": 14.692,
|
| 1687 |
+
"step": 9750
|
| 1688 |
+
},
|
| 1689 |
+
{
|
| 1690 |
+
"epoch": 0.7537881701407584,
|
| 1691 |
+
"grad_norm": 1.1483672857284546,
|
| 1692 |
+
"learning_rate": 0.00015172324234475238,
|
| 1693 |
+
"loss": 1.3462,
|
| 1694 |
+
"step": 9800
|
| 1695 |
+
},
|
| 1696 |
+
{
|
| 1697 |
+
"epoch": 0.7576340281516807,
|
| 1698 |
+
"grad_norm": 0.5047497153282166,
|
| 1699 |
+
"learning_rate": 0.0001514635223229359,
|
| 1700 |
+
"loss": 1.364,
|
| 1701 |
+
"step": 9850
|
| 1702 |
+
},
|
| 1703 |
+
{
|
| 1704 |
+
"epoch": 0.7614798861626029,
|
| 1705 |
+
"grad_norm": 1.240676760673523,
|
| 1706 |
+
"learning_rate": 0.0001512038023011194,
|
| 1707 |
+
"loss": 1.3563,
|
| 1708 |
+
"step": 9900
|
| 1709 |
+
},
|
| 1710 |
+
{
|
| 1711 |
+
"epoch": 0.7653257441735252,
|
| 1712 |
+
"grad_norm": 1.3579591512680054,
|
| 1713 |
+
"learning_rate": 0.00015094408227930292,
|
| 1714 |
+
"loss": 1.3661,
|
| 1715 |
+
"step": 9950
|
| 1716 |
+
},
|
| 1717 |
+
{
|
| 1718 |
+
"epoch": 0.7691716021844474,
|
| 1719 |
+
"grad_norm": 1.318456768989563,
|
| 1720 |
+
"learning_rate": 0.00015068436225748643,
|
| 1721 |
+
"loss": 1.4326,
|
| 1722 |
+
"step": 10000
|
| 1723 |
+
},
|
| 1724 |
+
{
|
| 1725 |
+
"epoch": 0.7691716021844474,
|
| 1726 |
+
"eval_loss": 1.36810302734375,
|
| 1727 |
+
"eval_runtime": 16.8388,
|
| 1728 |
+
"eval_samples_per_second": 59.387,
|
| 1729 |
+
"eval_steps_per_second": 14.847,
|
| 1730 |
+
"step": 10000
|
| 1731 |
}
|
| 1732 |
],
|
| 1733 |
"logging_steps": 50,
|