Training in progress, step 20000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37a2fe2b7cf5d7145d364d49968edb54efe43efd98eba6806289d74f7e529ae8
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7d0dd97707c0f0f3c8a12fdba68ea5ed4110d1f028231a6ea1054c90fb8603f
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59e604c620209e5ccc9d2a6df440d8eb819dc89b0c5554c52e42549c656e3250
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b205d8106465470d82a9f668e113b1a4c937f3fe768b385e78f85eb171e49cc2
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eee3ef0a596a0ef37ef802555af22f2c3a4f5d3b4c13d00ed6df2466a73dd8ed
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83d15c547e182574b42460e3490faaf5cb37f299e2d2acbd532f1ef35e0b3aee
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 18750,
|
| 3 |
"best_metric": 1.4757392406463623,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-15500",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3362,6 +3362,92 @@
|
|
| 3362 |
"eval_samples_per_second": 55.819,
|
| 3363 |
"eval_steps_per_second": 13.955,
|
| 3364 |
"step": 19500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3365 |
}
|
| 3366 |
],
|
| 3367 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 18750,
|
| 3 |
"best_metric": 1.4757392406463623,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-15500",
|
| 5 |
+
"epoch": 1.5383432043688947,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 20000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3362 |
"eval_samples_per_second": 55.819,
|
| 3363 |
"eval_steps_per_second": 13.955,
|
| 3364 |
"step": 19500
|
| 3365 |
+
},
|
| 3366 |
+
{
|
| 3367 |
+
"epoch": 1.5037304822705946,
|
| 3368 |
+
"grad_norm": 0.9244908690452576,
|
| 3369 |
+
"learning_rate": 0.00010110381009272005,
|
| 3370 |
+
"loss": 1.3834,
|
| 3371 |
+
"step": 19550
|
| 3372 |
+
},
|
| 3373 |
+
{
|
| 3374 |
+
"epoch": 1.5075763402815168,
|
| 3375 |
+
"grad_norm": 1.6488862037658691,
|
| 3376 |
+
"learning_rate": 0.00010084409007090356,
|
| 3377 |
+
"loss": 1.5311,
|
| 3378 |
+
"step": 19600
|
| 3379 |
+
},
|
| 3380 |
+
{
|
| 3381 |
+
"epoch": 1.511422198292439,
|
| 3382 |
+
"grad_norm": 1.9130067825317383,
|
| 3383 |
+
"learning_rate": 0.0001005843700490871,
|
| 3384 |
+
"loss": 1.5212,
|
| 3385 |
+
"step": 19650
|
| 3386 |
+
},
|
| 3387 |
+
{
|
| 3388 |
+
"epoch": 1.5152680563033614,
|
| 3389 |
+
"grad_norm": 1.326277256011963,
|
| 3390 |
+
"learning_rate": 0.0001003246500272706,
|
| 3391 |
+
"loss": 1.4067,
|
| 3392 |
+
"step": 19700
|
| 3393 |
+
},
|
| 3394 |
+
{
|
| 3395 |
+
"epoch": 1.5191139143142836,
|
| 3396 |
+
"grad_norm": 1.7258195877075195,
|
| 3397 |
+
"learning_rate": 0.00010006493000545411,
|
| 3398 |
+
"loss": 1.4844,
|
| 3399 |
+
"step": 19750
|
| 3400 |
+
},
|
| 3401 |
+
{
|
| 3402 |
+
"epoch": 1.5191139143142836,
|
| 3403 |
+
"eval_loss": 1.4816969633102417,
|
| 3404 |
+
"eval_runtime": 17.826,
|
| 3405 |
+
"eval_samples_per_second": 56.098,
|
| 3406 |
+
"eval_steps_per_second": 14.024,
|
| 3407 |
+
"step": 19750
|
| 3408 |
+
},
|
| 3409 |
+
{
|
| 3410 |
+
"epoch": 1.5229597723252057,
|
| 3411 |
+
"grad_norm": 1.8164838552474976,
|
| 3412 |
+
"learning_rate": 9.980520998363765e-05,
|
| 3413 |
+
"loss": 1.4233,
|
| 3414 |
+
"step": 19800
|
| 3415 |
+
},
|
| 3416 |
+
{
|
| 3417 |
+
"epoch": 1.5268056303361282,
|
| 3418 |
+
"grad_norm": 1.5884016752243042,
|
| 3419 |
+
"learning_rate": 9.954548996182115e-05,
|
| 3420 |
+
"loss": 1.4313,
|
| 3421 |
+
"step": 19850
|
| 3422 |
+
},
|
| 3423 |
+
{
|
| 3424 |
+
"epoch": 1.5306514883470501,
|
| 3425 |
+
"grad_norm": 1.5381648540496826,
|
| 3426 |
+
"learning_rate": 9.928576994000468e-05,
|
| 3427 |
+
"loss": 1.4789,
|
| 3428 |
+
"step": 19900
|
| 3429 |
+
},
|
| 3430 |
+
{
|
| 3431 |
+
"epoch": 1.5344973463579725,
|
| 3432 |
+
"grad_norm": 1.6448626518249512,
|
| 3433 |
+
"learning_rate": 9.90260499181882e-05,
|
| 3434 |
+
"loss": 1.3756,
|
| 3435 |
+
"step": 19950
|
| 3436 |
+
},
|
| 3437 |
+
{
|
| 3438 |
+
"epoch": 1.5383432043688947,
|
| 3439 |
+
"grad_norm": 1.6137230396270752,
|
| 3440 |
+
"learning_rate": 9.876632989637172e-05,
|
| 3441 |
+
"loss": 1.4171,
|
| 3442 |
+
"step": 20000
|
| 3443 |
+
},
|
| 3444 |
+
{
|
| 3445 |
+
"epoch": 1.5383432043688947,
|
| 3446 |
+
"eval_loss": 1.4770597219467163,
|
| 3447 |
+
"eval_runtime": 17.9365,
|
| 3448 |
+
"eval_samples_per_second": 55.752,
|
| 3449 |
+
"eval_steps_per_second": 13.938,
|
| 3450 |
+
"step": 20000
|
| 3451 |
}
|
| 3452 |
],
|
| 3453 |
"logging_steps": 50,
|