Training in progress, step 24500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe77af0b92d78af8f090fa50857d0c38d1b1c8c9a61171e03ceab5a8bf778802
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b6fd07dac8559828de7e766ee09820ece8ba98e8864c99beb26f34eb1163183
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2af1c783f76f5348e49f62ee7a890b8e943c0ef68fa2435323d3eee421b050d6
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6abd220bb5c699b08784d9e5bd7e4f3c387ae6cf3a2fc509bcb49366bfaee15
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53322b8d938de07c14e159878b580255485dcd3bd83b66c76e6228ae02ba69d0
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c34139dcaf2b70cc0d5b8ce63586446a3e5ce8fa9ab1d39d72513aeacbe3543
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4136,6 +4136,92 @@
|
|
| 4136 |
"eval_samples_per_second": 55.605,
|
| 4137 |
"eval_steps_per_second": 13.901,
|
| 4138 |
"step": 24000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4139 |
}
|
| 4140 |
],
|
| 4141 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 24500,
|
| 3 |
+
"best_metric": 1.445096731185913,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-24500",
|
| 5 |
+
"epoch": 1.884470425351896,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 24500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4136 |
"eval_samples_per_second": 55.605,
|
| 4137 |
"eval_steps_per_second": 13.901,
|
| 4138 |
"step": 24000
|
| 4139 |
+
},
|
| 4140 |
+
{
|
| 4141 |
+
"epoch": 1.8498577032535959,
|
| 4142 |
+
"grad_norm": 1.5047483444213867,
|
| 4143 |
+
"learning_rate": 7.773420252967302e-05,
|
| 4144 |
+
"loss": 1.4546,
|
| 4145 |
+
"step": 24050
|
| 4146 |
+
},
|
| 4147 |
+
{
|
| 4148 |
+
"epoch": 1.853703561264518,
|
| 4149 |
+
"grad_norm": 1.0463405847549438,
|
| 4150 |
+
"learning_rate": 7.747448250785654e-05,
|
| 4151 |
+
"loss": 1.5014,
|
| 4152 |
+
"step": 24100
|
| 4153 |
+
},
|
| 4154 |
+
{
|
| 4155 |
+
"epoch": 1.8575494192754403,
|
| 4156 |
+
"grad_norm": 1.8368524312973022,
|
| 4157 |
+
"learning_rate": 7.721476248604004e-05,
|
| 4158 |
+
"loss": 1.4616,
|
| 4159 |
+
"step": 24150
|
| 4160 |
+
},
|
| 4161 |
+
{
|
| 4162 |
+
"epoch": 1.8613952772863627,
|
| 4163 |
+
"grad_norm": 1.4084677696228027,
|
| 4164 |
+
"learning_rate": 7.695504246422357e-05,
|
| 4165 |
+
"loss": 1.4255,
|
| 4166 |
+
"step": 24200
|
| 4167 |
+
},
|
| 4168 |
+
{
|
| 4169 |
+
"epoch": 1.8652411352972849,
|
| 4170 |
+
"grad_norm": 1.2279951572418213,
|
| 4171 |
+
"learning_rate": 7.66953224424071e-05,
|
| 4172 |
+
"loss": 1.4254,
|
| 4173 |
+
"step": 24250
|
| 4174 |
+
},
|
| 4175 |
+
{
|
| 4176 |
+
"epoch": 1.8652411352972849,
|
| 4177 |
+
"eval_loss": 1.460336685180664,
|
| 4178 |
+
"eval_runtime": 17.7185,
|
| 4179 |
+
"eval_samples_per_second": 56.438,
|
| 4180 |
+
"eval_steps_per_second": 14.11,
|
| 4181 |
+
"step": 24250
|
| 4182 |
+
},
|
| 4183 |
+
{
|
| 4184 |
+
"epoch": 1.869086993308207,
|
| 4185 |
+
"grad_norm": 1.9729641675949097,
|
| 4186 |
+
"learning_rate": 7.643560242059061e-05,
|
| 4187 |
+
"loss": 1.4656,
|
| 4188 |
+
"step": 24300
|
| 4189 |
+
},
|
| 4190 |
+
{
|
| 4191 |
+
"epoch": 1.8729328513191295,
|
| 4192 |
+
"grad_norm": 0.9121168255805969,
|
| 4193 |
+
"learning_rate": 7.617588239877412e-05,
|
| 4194 |
+
"loss": 1.3949,
|
| 4195 |
+
"step": 24350
|
| 4196 |
+
},
|
| 4197 |
+
{
|
| 4198 |
+
"epoch": 1.8767787093300514,
|
| 4199 |
+
"grad_norm": 1.8953206539154053,
|
| 4200 |
+
"learning_rate": 7.591616237695765e-05,
|
| 4201 |
+
"loss": 1.4006,
|
| 4202 |
+
"step": 24400
|
| 4203 |
+
},
|
| 4204 |
+
{
|
| 4205 |
+
"epoch": 1.8806245673409738,
|
| 4206 |
+
"grad_norm": 1.5828944444656372,
|
| 4207 |
+
"learning_rate": 7.565644235514116e-05,
|
| 4208 |
+
"loss": 1.5085,
|
| 4209 |
+
"step": 24450
|
| 4210 |
+
},
|
| 4211 |
+
{
|
| 4212 |
+
"epoch": 1.884470425351896,
|
| 4213 |
+
"grad_norm": 2.027841329574585,
|
| 4214 |
+
"learning_rate": 7.539672233332467e-05,
|
| 4215 |
+
"loss": 1.3978,
|
| 4216 |
+
"step": 24500
|
| 4217 |
+
},
|
| 4218 |
+
{
|
| 4219 |
+
"epoch": 1.884470425351896,
|
| 4220 |
+
"eval_loss": 1.445096731185913,
|
| 4221 |
+
"eval_runtime": 18.4795,
|
| 4222 |
+
"eval_samples_per_second": 54.114,
|
| 4223 |
+
"eval_steps_per_second": 13.528,
|
| 4224 |
+
"step": 24500
|
| 4225 |
}
|
| 4226 |
],
|
| 4227 |
"logging_steps": 50,
|