Training in progress, step 5000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccc114460bdc62c626ba50c920e272e7f575d8aeb0e2543e39fc6ee05fa17062
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50d17eabb10066129b6714ff713807dcb69661d9ef6abc8937da238409918435
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f41eb4686c9fcfd89bdeef4e240aa2cba7d92d7862f3e09543657417b8a31b1f
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3f90304776c4344263775ad85fa55a101daf353f68d13a5872b9fc01a14e2b3
|
| 3 |
+
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:429536711957c0df9399d6c91b39d43aa4faf1c40a0c6bae8287d373fb9daa66
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17a4cf673b793da03b6c2fcc104b7095156a3747182923bcd50f7b981d959c40
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -782,6 +782,92 @@
|
|
| 782 |
"eval_samples_per_second": 59.456,
|
| 783 |
"eval_steps_per_second": 14.864,
|
| 784 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
}
|
| 786 |
],
|
| 787 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 5000,
|
| 3 |
+
"best_metric": 1.4847265481948853,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-5000",
|
| 5 |
+
"epoch": 0.3845858010922237,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 5000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 782 |
"eval_samples_per_second": 59.456,
|
| 783 |
"eval_steps_per_second": 14.864,
|
| 784 |
"step": 4500
|
| 785 |
+
},
|
| 786 |
+
{
|
| 787 |
+
"epoch": 0.34997307899392355,
|
| 788 |
+
"grad_norm": 1.2649528980255127,
|
| 789 |
+
"learning_rate": 0.00017898865023504662,
|
| 790 |
+
"loss": 1.551,
|
| 791 |
+
"step": 4550
|
| 792 |
+
},
|
| 793 |
+
{
|
| 794 |
+
"epoch": 0.3538189370048458,
|
| 795 |
+
"grad_norm": 1.403538703918457,
|
| 796 |
+
"learning_rate": 0.00017872893021323013,
|
| 797 |
+
"loss": 1.5004,
|
| 798 |
+
"step": 4600
|
| 799 |
+
},
|
| 800 |
+
{
|
| 801 |
+
"epoch": 0.35766479501576803,
|
| 802 |
+
"grad_norm": 1.5697741508483887,
|
| 803 |
+
"learning_rate": 0.00017846921019141367,
|
| 804 |
+
"loss": 1.5249,
|
| 805 |
+
"step": 4650
|
| 806 |
+
},
|
| 807 |
+
{
|
| 808 |
+
"epoch": 0.3615106530266903,
|
| 809 |
+
"grad_norm": 1.1718928813934326,
|
| 810 |
+
"learning_rate": 0.0001782094901695972,
|
| 811 |
+
"loss": 1.4532,
|
| 812 |
+
"step": 4700
|
| 813 |
+
},
|
| 814 |
+
{
|
| 815 |
+
"epoch": 0.36535651103761246,
|
| 816 |
+
"grad_norm": 1.1711490154266357,
|
| 817 |
+
"learning_rate": 0.0001779497701477807,
|
| 818 |
+
"loss": 1.4466,
|
| 819 |
+
"step": 4750
|
| 820 |
+
},
|
| 821 |
+
{
|
| 822 |
+
"epoch": 0.36535651103761246,
|
| 823 |
+
"eval_loss": 1.4999897480010986,
|
| 824 |
+
"eval_runtime": 16.924,
|
| 825 |
+
"eval_samples_per_second": 59.088,
|
| 826 |
+
"eval_steps_per_second": 14.772,
|
| 827 |
+
"step": 4750
|
| 828 |
+
},
|
| 829 |
+
{
|
| 830 |
+
"epoch": 0.3692023690485347,
|
| 831 |
+
"grad_norm": 1.130150556564331,
|
| 832 |
+
"learning_rate": 0.0001776900501259642,
|
| 833 |
+
"loss": 1.5159,
|
| 834 |
+
"step": 4800
|
| 835 |
+
},
|
| 836 |
+
{
|
| 837 |
+
"epoch": 0.37304822705945695,
|
| 838 |
+
"grad_norm": 1.564491629600525,
|
| 839 |
+
"learning_rate": 0.00017743033010414773,
|
| 840 |
+
"loss": 1.481,
|
| 841 |
+
"step": 4850
|
| 842 |
+
},
|
| 843 |
+
{
|
| 844 |
+
"epoch": 0.3768940850703792,
|
| 845 |
+
"grad_norm": 2.652865409851074,
|
| 846 |
+
"learning_rate": 0.00017717061008233124,
|
| 847 |
+
"loss": 1.5012,
|
| 848 |
+
"step": 4900
|
| 849 |
+
},
|
| 850 |
+
{
|
| 851 |
+
"epoch": 0.38073994308130144,
|
| 852 |
+
"grad_norm": 1.3611321449279785,
|
| 853 |
+
"learning_rate": 0.00017691089006051478,
|
| 854 |
+
"loss": 1.4732,
|
| 855 |
+
"step": 4950
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"epoch": 0.3845858010922237,
|
| 859 |
+
"grad_norm": 1.3816261291503906,
|
| 860 |
+
"learning_rate": 0.0001766511700386983,
|
| 861 |
+
"loss": 1.4888,
|
| 862 |
+
"step": 5000
|
| 863 |
+
},
|
| 864 |
+
{
|
| 865 |
+
"epoch": 0.3845858010922237,
|
| 866 |
+
"eval_loss": 1.4847265481948853,
|
| 867 |
+
"eval_runtime": 16.9023,
|
| 868 |
+
"eval_samples_per_second": 59.163,
|
| 869 |
+
"eval_steps_per_second": 14.791,
|
| 870 |
+
"step": 5000
|
| 871 |
}
|
| 872 |
],
|
| 873 |
"logging_steps": 50,
|