Training in progress, step 23000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:829898806c2cf797a60646c5ed75fbf0d1a577e9d79be79d379be1431ad4345b
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:314892f8ce2942ea9f92638fd38beb1039385352c76b29835054762eadd01ceb
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:332c8cdea66a1010f828c6ecd205e8e1ab1047e6bc38f9436e154fb2f1bf959d
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f21304ce44e93f8f86da1b431eb1e188b0a7d5ce22c8a8c84f5d679245daaffa
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e49d1b644e105b5cfb9b398b2668e7af4b7561d44b430dc9a36c09d64fe8801
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9322bae8c5f36989bed1e15319c91ee4f9f6797c8ed15e1827437e61c8ea85d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 22250,
|
| 3 |
"best_metric": 1.4596961736679077,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-21000",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3878,6 +3878,92 @@
|
|
| 3878 |
"eval_samples_per_second": 56.127,
|
| 3879 |
"eval_steps_per_second": 14.032,
|
| 3880 |
"step": 22500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3881 |
}
|
| 3882 |
],
|
| 3883 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 22250,
|
| 3 |
"best_metric": 1.4596961736679077,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-21000",
|
| 5 |
+
"epoch": 1.7690946850242288,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 23000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3878 |
"eval_samples_per_second": 56.127,
|
| 3879 |
"eval_steps_per_second": 14.032,
|
| 3880 |
"step": 22500
|
| 3881 |
+
},
|
| 3882 |
+
{
|
| 3883 |
+
"epoch": 1.734481962925929,
|
| 3884 |
+
"grad_norm": 0.8531803488731384,
|
| 3885 |
+
"learning_rate": 8.552580318416747e-05,
|
| 3886 |
+
"loss": 1.4298,
|
| 3887 |
+
"step": 22550
|
| 3888 |
+
},
|
| 3889 |
+
{
|
| 3890 |
+
"epoch": 1.738327820936851,
|
| 3891 |
+
"grad_norm": 1.3987632989883423,
|
| 3892 |
+
"learning_rate": 8.526608316235098e-05,
|
| 3893 |
+
"loss": 1.4629,
|
| 3894 |
+
"step": 22600
|
| 3895 |
+
},
|
| 3896 |
+
{
|
| 3897 |
+
"epoch": 1.7421736789477733,
|
| 3898 |
+
"grad_norm": 1.4521870613098145,
|
| 3899 |
+
"learning_rate": 8.500636314053451e-05,
|
| 3900 |
+
"loss": 1.4005,
|
| 3901 |
+
"step": 22650
|
| 3902 |
+
},
|
| 3903 |
+
{
|
| 3904 |
+
"epoch": 1.7460195369586955,
|
| 3905 |
+
"grad_norm": 1.0557054281234741,
|
| 3906 |
+
"learning_rate": 8.474664311871803e-05,
|
| 3907 |
+
"loss": 1.4079,
|
| 3908 |
+
"step": 22700
|
| 3909 |
+
},
|
| 3910 |
+
{
|
| 3911 |
+
"epoch": 1.7498653949696177,
|
| 3912 |
+
"grad_norm": 1.5067927837371826,
|
| 3913 |
+
"learning_rate": 8.448692309690155e-05,
|
| 3914 |
+
"loss": 1.4664,
|
| 3915 |
+
"step": 22750
|
| 3916 |
+
},
|
| 3917 |
+
{
|
| 3918 |
+
"epoch": 1.7498653949696177,
|
| 3919 |
+
"eval_loss": 1.4728831052780151,
|
| 3920 |
+
"eval_runtime": 17.7456,
|
| 3921 |
+
"eval_samples_per_second": 56.352,
|
| 3922 |
+
"eval_steps_per_second": 14.088,
|
| 3923 |
+
"step": 22750
|
| 3924 |
+
},
|
| 3925 |
+
{
|
| 3926 |
+
"epoch": 1.75371125298054,
|
| 3927 |
+
"grad_norm": 1.3237221240997314,
|
| 3928 |
+
"learning_rate": 8.422720307508506e-05,
|
| 3929 |
+
"loss": 1.5266,
|
| 3930 |
+
"step": 22800
|
| 3931 |
+
},
|
| 3932 |
+
{
|
| 3933 |
+
"epoch": 1.757557110991462,
|
| 3934 |
+
"grad_norm": 1.4342700242996216,
|
| 3935 |
+
"learning_rate": 8.396748305326859e-05,
|
| 3936 |
+
"loss": 1.4929,
|
| 3937 |
+
"step": 22850
|
| 3938 |
+
},
|
| 3939 |
+
{
|
| 3940 |
+
"epoch": 1.7614029690023845,
|
| 3941 |
+
"grad_norm": 5.2202534675598145,
|
| 3942 |
+
"learning_rate": 8.37077630314521e-05,
|
| 3943 |
+
"loss": 1.457,
|
| 3944 |
+
"step": 22900
|
| 3945 |
+
},
|
| 3946 |
+
{
|
| 3947 |
+
"epoch": 1.7652488270133067,
|
| 3948 |
+
"grad_norm": 2.7584545612335205,
|
| 3949 |
+
"learning_rate": 8.344804300963561e-05,
|
| 3950 |
+
"loss": 1.4523,
|
| 3951 |
+
"step": 22950
|
| 3952 |
+
},
|
| 3953 |
+
{
|
| 3954 |
+
"epoch": 1.7690946850242288,
|
| 3955 |
+
"grad_norm": 1.8208624124526978,
|
| 3956 |
+
"learning_rate": 8.318832298781914e-05,
|
| 3957 |
+
"loss": 1.4746,
|
| 3958 |
+
"step": 23000
|
| 3959 |
+
},
|
| 3960 |
+
{
|
| 3961 |
+
"epoch": 1.7690946850242288,
|
| 3962 |
+
"eval_loss": 1.467396855354309,
|
| 3963 |
+
"eval_runtime": 17.663,
|
| 3964 |
+
"eval_samples_per_second": 56.616,
|
| 3965 |
+
"eval_steps_per_second": 14.154,
|
| 3966 |
+
"step": 23000
|
| 3967 |
}
|
| 3968 |
],
|
| 3969 |
"logging_steps": 50,
|