Training in progress, step 25500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a05a340cb4a65f48636e96ed4fd91629d922458e974787fca1ba7f6545c46cb6
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe554b76c07e1e707d896c2ee3a20371f26db4350a7a78776a0cbae8a2db2c39
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d55ae62412810c5314e1159d09e99db512bb7ac1a37d12bf0208475b2b472ef4
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc69f04c57ac233f68440607df3025c3f527f698c64e56350e0ea45b99be0781
|
| 3 |
+
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ce31cedb3c2765a684ec6f8057f362dc008c191e855761ff2fb30ba5f1fb29d
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec634d9549d9c3645447089832de3b53917dd1ce5acd38abf0faed5b1df6f1af
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 24500,
|
| 3 |
"best_metric": 1.445096731185913,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-24500",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4308,6 +4308,92 @@
|
|
| 4308 |
"eval_samples_per_second": 54.096,
|
| 4309 |
"eval_steps_per_second": 13.524,
|
| 4310 |
"step": 25000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4311 |
}
|
| 4312 |
],
|
| 4313 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 24500,
|
| 3 |
"best_metric": 1.445096731185913,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-24500",
|
| 5 |
+
"epoch": 1.9613875855703409,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 25500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4308 |
"eval_samples_per_second": 54.096,
|
| 4309 |
"eval_steps_per_second": 13.524,
|
| 4310 |
"step": 25000
|
| 4311 |
+
},
|
| 4312 |
+
{
|
| 4313 |
+
"epoch": 1.9267748634720405,
|
| 4314 |
+
"grad_norm": 1.4893878698349,
|
| 4315 |
+
"learning_rate": 7.25449964937797e-05,
|
| 4316 |
+
"loss": 1.5067,
|
| 4317 |
+
"step": 25050
|
| 4318 |
+
},
|
| 4319 |
+
{
|
| 4320 |
+
"epoch": 1.930620721482963,
|
| 4321 |
+
"grad_norm": 0.8735935091972351,
|
| 4322 |
+
"learning_rate": 7.228527647196322e-05,
|
| 4323 |
+
"loss": 1.4671,
|
| 4324 |
+
"step": 25100
|
| 4325 |
+
},
|
| 4326 |
+
{
|
| 4327 |
+
"epoch": 1.9344665794938851,
|
| 4328 |
+
"grad_norm": 1.6086535453796387,
|
| 4329 |
+
"learning_rate": 7.202555645014675e-05,
|
| 4330 |
+
"loss": 1.4551,
|
| 4331 |
+
"step": 25150
|
| 4332 |
+
},
|
| 4333 |
+
{
|
| 4334 |
+
"epoch": 1.9383124375048073,
|
| 4335 |
+
"grad_norm": 0.683675229549408,
|
| 4336 |
+
"learning_rate": 7.176583642833027e-05,
|
| 4337 |
+
"loss": 1.4673,
|
| 4338 |
+
"step": 25200
|
| 4339 |
+
},
|
| 4340 |
+
{
|
| 4341 |
+
"epoch": 1.9421582955157297,
|
| 4342 |
+
"grad_norm": 1.9318158626556396,
|
| 4343 |
+
"learning_rate": 7.150611640651378e-05,
|
| 4344 |
+
"loss": 1.4199,
|
| 4345 |
+
"step": 25250
|
| 4346 |
+
},
|
| 4347 |
+
{
|
| 4348 |
+
"epoch": 1.9421582955157297,
|
| 4349 |
+
"eval_loss": 1.4574114084243774,
|
| 4350 |
+
"eval_runtime": 18.5222,
|
| 4351 |
+
"eval_samples_per_second": 53.989,
|
| 4352 |
+
"eval_steps_per_second": 13.497,
|
| 4353 |
+
"step": 25250
|
| 4354 |
+
},
|
| 4355 |
+
{
|
| 4356 |
+
"epoch": 1.9460041535266517,
|
| 4357 |
+
"grad_norm": 1.9871971607208252,
|
| 4358 |
+
"learning_rate": 7.12463963846973e-05,
|
| 4359 |
+
"loss": 1.5002,
|
| 4360 |
+
"step": 25300
|
| 4361 |
+
},
|
| 4362 |
+
{
|
| 4363 |
+
"epoch": 1.949850011537574,
|
| 4364 |
+
"grad_norm": 1.4302830696105957,
|
| 4365 |
+
"learning_rate": 7.098667636288082e-05,
|
| 4366 |
+
"loss": 1.46,
|
| 4367 |
+
"step": 25350
|
| 4368 |
+
},
|
| 4369 |
+
{
|
| 4370 |
+
"epoch": 1.9536958695484963,
|
| 4371 |
+
"grad_norm": 1.8389050960540771,
|
| 4372 |
+
"learning_rate": 7.072695634106434e-05,
|
| 4373 |
+
"loss": 1.4025,
|
| 4374 |
+
"step": 25400
|
| 4375 |
+
},
|
| 4376 |
+
{
|
| 4377 |
+
"epoch": 1.9575417275594185,
|
| 4378 |
+
"grad_norm": 1.7089191675186157,
|
| 4379 |
+
"learning_rate": 7.046723631924785e-05,
|
| 4380 |
+
"loss": 1.4507,
|
| 4381 |
+
"step": 25450
|
| 4382 |
+
},
|
| 4383 |
+
{
|
| 4384 |
+
"epoch": 1.9613875855703409,
|
| 4385 |
+
"grad_norm": 1.3698766231536865,
|
| 4386 |
+
"learning_rate": 7.020751629743138e-05,
|
| 4387 |
+
"loss": 1.4954,
|
| 4388 |
+
"step": 25500
|
| 4389 |
+
},
|
| 4390 |
+
{
|
| 4391 |
+
"epoch": 1.9613875855703409,
|
| 4392 |
+
"eval_loss": 1.454710841178894,
|
| 4393 |
+
"eval_runtime": 18.5708,
|
| 4394 |
+
"eval_samples_per_second": 53.848,
|
| 4395 |
+
"eval_steps_per_second": 13.462,
|
| 4396 |
+
"step": 25500
|
| 4397 |
}
|
| 4398 |
],
|
| 4399 |
"logging_steps": 50,
|