Training in progress, step 10000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a0635af77a0cc188290373b523679a526e8c1ad75d8c7f25b922fea9cf62dca
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bf901e73d41ea0ccfa16c6684d24d585b1f05ef9f57a4524e8694a26b9940a3
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b950c496923243d0912e84a1eb84cb370513a822a8463c038d022e062e515bc
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b4e6f78506ab09c8a3ff311a2dfeb7a6190585c49701c5bd2fcc763c2b50448
|
| 3 |
+
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2a8c76f206a59cf071ae08cd5fd0af4dd2719f5d4bfbabc67d57af26f56bb51
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dca9e86a8e1c5a776637e4448f160d7ddd557ac645e78f5bee2fc465ee6ebde3
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 9500,
|
| 3 |
"best_metric": 4.357193470001221,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-9500",
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1642,6 +1642,92 @@
|
|
| 1642 |
"eval_samples_per_second": 53.656,
|
| 1643 |
"eval_steps_per_second": 13.414,
|
| 1644 |
"step": 9500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1645 |
}
|
| 1646 |
],
|
| 1647 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 9500,
|
| 3 |
"best_metric": 4.357193470001221,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-9500",
|
| 5 |
+
"epoch": 0.7691716021844474,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 10000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1642 |
"eval_samples_per_second": 53.656,
|
| 1643 |
"eval_steps_per_second": 13.414,
|
| 1644 |
"step": 9500
|
| 1645 |
+
},
|
| 1646 |
+
{
|
| 1647 |
+
"epoch": 0.7345588800861472,
|
| 1648 |
+
"grad_norm": 3.8532371520996094,
|
| 1649 |
+
"learning_rate": 0.0001530426200555801,
|
| 1650 |
+
"loss": 4.3473,
|
| 1651 |
+
"step": 9550
|
| 1652 |
+
},
|
| 1653 |
+
{
|
| 1654 |
+
"epoch": 0.7384047380970694,
|
| 1655 |
+
"grad_norm": 4.654659271240234,
|
| 1656 |
+
"learning_rate": 0.0001527829000337636,
|
| 1657 |
+
"loss": 4.457,
|
| 1658 |
+
"step": 9600
|
| 1659 |
+
},
|
| 1660 |
+
{
|
| 1661 |
+
"epoch": 0.7422505961079917,
|
| 1662 |
+
"grad_norm": 2.420182228088379,
|
| 1663 |
+
"learning_rate": 0.0001525231800119471,
|
| 1664 |
+
"loss": 4.4521,
|
| 1665 |
+
"step": 9650
|
| 1666 |
+
},
|
| 1667 |
+
{
|
| 1668 |
+
"epoch": 0.7460964541189139,
|
| 1669 |
+
"grad_norm": 4.189414978027344,
|
| 1670 |
+
"learning_rate": 0.00015226345999013065,
|
| 1671 |
+
"loss": 4.2569,
|
| 1672 |
+
"step": 9700
|
| 1673 |
+
},
|
| 1674 |
+
{
|
| 1675 |
+
"epoch": 0.7499423121298362,
|
| 1676 |
+
"grad_norm": 2.824084997177124,
|
| 1677 |
+
"learning_rate": 0.00015200373996831416,
|
| 1678 |
+
"loss": 4.3409,
|
| 1679 |
+
"step": 9750
|
| 1680 |
+
},
|
| 1681 |
+
{
|
| 1682 |
+
"epoch": 0.7499423121298362,
|
| 1683 |
+
"eval_loss": 4.378731727600098,
|
| 1684 |
+
"eval_runtime": 18.4857,
|
| 1685 |
+
"eval_samples_per_second": 54.096,
|
| 1686 |
+
"eval_steps_per_second": 13.524,
|
| 1687 |
+
"step": 9750
|
| 1688 |
+
},
|
| 1689 |
+
{
|
| 1690 |
+
"epoch": 0.7537881701407584,
|
| 1691 |
+
"grad_norm": 6.379781723022461,
|
| 1692 |
+
"learning_rate": 0.00015174401994649768,
|
| 1693 |
+
"loss": 4.3041,
|
| 1694 |
+
"step": 9800
|
| 1695 |
+
},
|
| 1696 |
+
{
|
| 1697 |
+
"epoch": 0.7576340281516807,
|
| 1698 |
+
"grad_norm": 1.7334113121032715,
|
| 1699 |
+
"learning_rate": 0.00015148429992468122,
|
| 1700 |
+
"loss": 4.3096,
|
| 1701 |
+
"step": 9850
|
| 1702 |
+
},
|
| 1703 |
+
{
|
| 1704 |
+
"epoch": 0.7614798861626029,
|
| 1705 |
+
"grad_norm": 4.287415027618408,
|
| 1706 |
+
"learning_rate": 0.00015122457990286473,
|
| 1707 |
+
"loss": 4.4411,
|
| 1708 |
+
"step": 9900
|
| 1709 |
+
},
|
| 1710 |
+
{
|
| 1711 |
+
"epoch": 0.7653257441735252,
|
| 1712 |
+
"grad_norm": 3.3184821605682373,
|
| 1713 |
+
"learning_rate": 0.00015096485988104821,
|
| 1714 |
+
"loss": 4.3992,
|
| 1715 |
+
"step": 9950
|
| 1716 |
+
},
|
| 1717 |
+
{
|
| 1718 |
+
"epoch": 0.7691716021844474,
|
| 1719 |
+
"grad_norm": 4.698968887329102,
|
| 1720 |
+
"learning_rate": 0.00015070513985923175,
|
| 1721 |
+
"loss": 4.4726,
|
| 1722 |
+
"step": 10000
|
| 1723 |
+
},
|
| 1724 |
+
{
|
| 1725 |
+
"epoch": 0.7691716021844474,
|
| 1726 |
+
"eval_loss": 4.408615589141846,
|
| 1727 |
+
"eval_runtime": 18.5408,
|
| 1728 |
+
"eval_samples_per_second": 53.935,
|
| 1729 |
+
"eval_steps_per_second": 13.484,
|
| 1730 |
+
"step": 10000
|
| 1731 |
}
|
| 1732 |
],
|
| 1733 |
"logging_steps": 50,
|