Training in progress, step 26500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f619cbe8165ed1e2332f6436e4fdf6c896db5237d638a5179f03f772e54deae1
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e41f1a18d6f9531a4bd645cd462d9090e6f54162f6fddf017afc585f3099347
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f75a42181e10c429bdf4cde150fa8bd273b194edd1af255a2c46af2a16e4d777
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b56501523df118c1a33e60d970ee258e92691efddadd68cb368e352ca4fb0c1
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3bfa502239c4234c76e8429085577f9d92a7a11c10a2a19927df506f4396be9a
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cde03cf528fa40100e793e6bc03d3e643c03bab96500bb8c5705c4e1018be13d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 24500,
|
| 3 |
"best_metric": 1.445096731185913,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-24500",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4480,6 +4480,92 @@
|
|
| 4480 |
"eval_samples_per_second": 55.373,
|
| 4481 |
"eval_steps_per_second": 13.843,
|
| 4482 |
"step": 26000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4483 |
}
|
| 4484 |
],
|
| 4485 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 24500,
|
| 3 |
"best_metric": 1.445096731185913,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-24500",
|
| 5 |
+
"epoch": 2.0383047457887855,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 26500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4480 |
"eval_samples_per_second": 55.373,
|
| 4481 |
"eval_steps_per_second": 13.843,
|
| 4482 |
"step": 26000
|
| 4483 |
+
},
|
| 4484 |
+
{
|
| 4485 |
+
"epoch": 2.003692023690485,
|
| 4486 |
+
"grad_norm": 1.621793508529663,
|
| 4487 |
+
"learning_rate": 6.73557904578864e-05,
|
| 4488 |
+
"loss": 1.369,
|
| 4489 |
+
"step": 26050
|
| 4490 |
+
},
|
| 4491 |
+
{
|
| 4492 |
+
"epoch": 2.0075378817014076,
|
| 4493 |
+
"grad_norm": 2.223520278930664,
|
| 4494 |
+
"learning_rate": 6.709607043606992e-05,
|
| 4495 |
+
"loss": 1.4269,
|
| 4496 |
+
"step": 26100
|
| 4497 |
+
},
|
| 4498 |
+
{
|
| 4499 |
+
"epoch": 2.01138373971233,
|
| 4500 |
+
"grad_norm": 1.4860827922821045,
|
| 4501 |
+
"learning_rate": 6.683635041425344e-05,
|
| 4502 |
+
"loss": 1.3634,
|
| 4503 |
+
"step": 26150
|
| 4504 |
+
},
|
| 4505 |
+
{
|
| 4506 |
+
"epoch": 2.015229597723252,
|
| 4507 |
+
"grad_norm": 2.0796148777008057,
|
| 4508 |
+
"learning_rate": 6.657663039243696e-05,
|
| 4509 |
+
"loss": 1.4233,
|
| 4510 |
+
"step": 26200
|
| 4511 |
+
},
|
| 4512 |
+
{
|
| 4513 |
+
"epoch": 2.0190754557341744,
|
| 4514 |
+
"grad_norm": 1.6398444175720215,
|
| 4515 |
+
"learning_rate": 6.631691037062047e-05,
|
| 4516 |
+
"loss": 1.4058,
|
| 4517 |
+
"step": 26250
|
| 4518 |
+
},
|
| 4519 |
+
{
|
| 4520 |
+
"epoch": 2.0190754557341744,
|
| 4521 |
+
"eval_loss": 1.4733901023864746,
|
| 4522 |
+
"eval_runtime": 18.0349,
|
| 4523 |
+
"eval_samples_per_second": 55.448,
|
| 4524 |
+
"eval_steps_per_second": 13.862,
|
| 4525 |
+
"step": 26250
|
| 4526 |
+
},
|
| 4527 |
+
{
|
| 4528 |
+
"epoch": 2.0229213137450963,
|
| 4529 |
+
"grad_norm": 1.7550077438354492,
|
| 4530 |
+
"learning_rate": 6.605719034880399e-05,
|
| 4531 |
+
"loss": 1.4436,
|
| 4532 |
+
"step": 26300
|
| 4533 |
+
},
|
| 4534 |
+
{
|
| 4535 |
+
"epoch": 2.0267671717560187,
|
| 4536 |
+
"grad_norm": 2.3273561000823975,
|
| 4537 |
+
"learning_rate": 6.579747032698751e-05,
|
| 4538 |
+
"loss": 1.356,
|
| 4539 |
+
"step": 26350
|
| 4540 |
+
},
|
| 4541 |
+
{
|
| 4542 |
+
"epoch": 2.030613029766941,
|
| 4543 |
+
"grad_norm": 1.1432509422302246,
|
| 4544 |
+
"learning_rate": 6.553775030517103e-05,
|
| 4545 |
+
"loss": 1.4116,
|
| 4546 |
+
"step": 26400
|
| 4547 |
+
},
|
| 4548 |
+
{
|
| 4549 |
+
"epoch": 2.034458887777863,
|
| 4550 |
+
"grad_norm": 1.2345376014709473,
|
| 4551 |
+
"learning_rate": 6.527803028335455e-05,
|
| 4552 |
+
"loss": 1.4465,
|
| 4553 |
+
"step": 26450
|
| 4554 |
+
},
|
| 4555 |
+
{
|
| 4556 |
+
"epoch": 2.0383047457887855,
|
| 4557 |
+
"grad_norm": 1.485564112663269,
|
| 4558 |
+
"learning_rate": 6.501831026153807e-05,
|
| 4559 |
+
"loss": 1.3896,
|
| 4560 |
+
"step": 26500
|
| 4561 |
+
},
|
| 4562 |
+
{
|
| 4563 |
+
"epoch": 2.0383047457887855,
|
| 4564 |
+
"eval_loss": 1.4492217302322388,
|
| 4565 |
+
"eval_runtime": 17.9114,
|
| 4566 |
+
"eval_samples_per_second": 55.83,
|
| 4567 |
+
"eval_steps_per_second": 13.958,
|
| 4568 |
+
"step": 26500
|
| 4569 |
}
|
| 4570 |
],
|
| 4571 |
"logging_steps": 50,
|