Training in progress, step 37500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c27b235ffd7eb3533febc9af0f60470e3dba0b8e45360272535091fdccd177b
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:696e56b33a026fc5d8957bdce7458fb9a6f7ad74969dc7cd1a68cbc7a0b070a7
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31f016b3f65efa39acc365f2cd200e250e466d276146e2b7b6697bb3bf4c7a78
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97f5694df4cc55cfb23211b8e8f8f54247ffb944ba00f7e779e21697183c2f1f
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cc58fd6b30bc41899d1238111aee5cdb3d8eeebbfe25f934de223f53728d54d
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23ce717bb1ff7d19ce8b39673c5e006d14b3fec124190d834c88a63ab05da6d0
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": -30.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6372,6 +6372,92 @@
|
|
| 6372 |
"eval_samples_per_second": 59.475,
|
| 6373 |
"eval_steps_per_second": 14.869,
|
| 6374 |
"step": 37000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6375 |
}
|
| 6376 |
],
|
| 6377 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 37500,
|
| 3 |
+
"best_metric": -30.470531463623047,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-37500",
|
| 5 |
+
"epoch": 2.8843935081916774,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 37500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6372 |
"eval_samples_per_second": 59.475,
|
| 6373 |
"eval_steps_per_second": 14.869,
|
| 6374 |
"step": 37000
|
| 6375 |
+
},
|
| 6376 |
+
{
|
| 6377 |
+
"epoch": 2.8497807860933775,
|
| 6378 |
+
"grad_norm": 0.8433590531349182,
|
| 6379 |
+
"learning_rate": 1.0227774459133055e-05,
|
| 6380 |
+
"loss": -30.4661,
|
| 6381 |
+
"step": 37050
|
| 6382 |
+
},
|
| 6383 |
+
{
|
| 6384 |
+
"epoch": 2.8536266441043,
|
| 6385 |
+
"grad_norm": 0.6508035063743591,
|
| 6386 |
+
"learning_rate": 9.968054437316573e-06,
|
| 6387 |
+
"loss": -30.4663,
|
| 6388 |
+
"step": 37100
|
| 6389 |
+
},
|
| 6390 |
+
{
|
| 6391 |
+
"epoch": 2.857472502115222,
|
| 6392 |
+
"grad_norm": 1.3801597356796265,
|
| 6393 |
+
"learning_rate": 9.70833441550009e-06,
|
| 6394 |
+
"loss": -30.4662,
|
| 6395 |
+
"step": 37150
|
| 6396 |
+
},
|
| 6397 |
+
{
|
| 6398 |
+
"epoch": 2.8613183601261443,
|
| 6399 |
+
"grad_norm": 0.7447367906570435,
|
| 6400 |
+
"learning_rate": 9.448614393683609e-06,
|
| 6401 |
+
"loss": -30.4663,
|
| 6402 |
+
"step": 37200
|
| 6403 |
+
},
|
| 6404 |
+
{
|
| 6405 |
+
"epoch": 2.8651642181370662,
|
| 6406 |
+
"grad_norm": 1.370768666267395,
|
| 6407 |
+
"learning_rate": 9.188894371867128e-06,
|
| 6408 |
+
"loss": -30.4661,
|
| 6409 |
+
"step": 37250
|
| 6410 |
+
},
|
| 6411 |
+
{
|
| 6412 |
+
"epoch": 2.8651642181370662,
|
| 6413 |
+
"eval_loss": -30.470088958740234,
|
| 6414 |
+
"eval_runtime": 16.7498,
|
| 6415 |
+
"eval_samples_per_second": 59.702,
|
| 6416 |
+
"eval_steps_per_second": 14.926,
|
| 6417 |
+
"step": 37250
|
| 6418 |
+
},
|
| 6419 |
+
{
|
| 6420 |
+
"epoch": 2.8690100761479886,
|
| 6421 |
+
"grad_norm": 1.3880547285079956,
|
| 6422 |
+
"learning_rate": 8.929174350050646e-06,
|
| 6423 |
+
"loss": -30.4665,
|
| 6424 |
+
"step": 37300
|
| 6425 |
+
},
|
| 6426 |
+
{
|
| 6427 |
+
"epoch": 2.8728559341589106,
|
| 6428 |
+
"grad_norm": 1.428084135055542,
|
| 6429 |
+
"learning_rate": 8.669454328234164e-06,
|
| 6430 |
+
"loss": -30.4665,
|
| 6431 |
+
"step": 37350
|
| 6432 |
+
},
|
| 6433 |
+
{
|
| 6434 |
+
"epoch": 2.876701792169833,
|
| 6435 |
+
"grad_norm": 0.4759369492530823,
|
| 6436 |
+
"learning_rate": 8.409734306417682e-06,
|
| 6437 |
+
"loss": -30.4665,
|
| 6438 |
+
"step": 37400
|
| 6439 |
+
},
|
| 6440 |
+
{
|
| 6441 |
+
"epoch": 2.8805476501807554,
|
| 6442 |
+
"grad_norm": 1.0323377847671509,
|
| 6443 |
+
"learning_rate": 8.1500142846012e-06,
|
| 6444 |
+
"loss": -30.4666,
|
| 6445 |
+
"step": 37450
|
| 6446 |
+
},
|
| 6447 |
+
{
|
| 6448 |
+
"epoch": 2.8843935081916774,
|
| 6449 |
+
"grad_norm": 0.9753648638725281,
|
| 6450 |
+
"learning_rate": 7.890294262784718e-06,
|
| 6451 |
+
"loss": -30.4666,
|
| 6452 |
+
"step": 37500
|
| 6453 |
+
},
|
| 6454 |
+
{
|
| 6455 |
+
"epoch": 2.8843935081916774,
|
| 6456 |
+
"eval_loss": -30.470531463623047,
|
| 6457 |
+
"eval_runtime": 16.7944,
|
| 6458 |
+
"eval_samples_per_second": 59.544,
|
| 6459 |
+
"eval_steps_per_second": 14.886,
|
| 6460 |
+
"step": 37500
|
| 6461 |
}
|
| 6462 |
],
|
| 6463 |
"logging_steps": 50,
|