Training in progress, step 3000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e598bf8b3c9e467b65b1bb82c8cbdd7f6d9c1972d320fbed970e890963dcdc5d
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d2d6f677b30f619e4a135ea8b093f0ab4667e8aba8c81cd7afaa73bd22e2ff8
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dc13a71e65c3e33eb37c20ebdf6182925491cef76344bbf33cf66c0cd5d4873
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cdad770e6415de25f2503b80a04125ce36c191e7ebbe40cc70935c15b3079b5
|
| 3 |
+
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fd6bb9f68e7b13e6013a02cf99adaeff381c90e8c38480bfbcc7ea752fc628b
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e2f4f94b8df8c39a3dbd1dfca81bfb52c1274fbec2120e8d28a3f0e8b8a87c3
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 4.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -438,6 +438,92 @@
|
|
| 438 |
"eval_samples_per_second": 54.025,
|
| 439 |
"eval_steps_per_second": 13.506,
|
| 440 |
"step": 2500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
}
|
| 442 |
],
|
| 443 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 3000,
|
| 3 |
+
"best_metric": 4.579595565795898,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-3000",
|
| 5 |
+
"epoch": 0.2307514806553342,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 3000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 438 |
"eval_samples_per_second": 54.025,
|
| 439 |
"eval_steps_per_second": 13.506,
|
| 440 |
"step": 2500
|
| 441 |
+
},
|
| 442 |
+
{
|
| 443 |
+
"epoch": 0.19613875855703408,
|
| 444 |
+
"grad_norm": 2.59525465965271,
|
| 445 |
+
"learning_rate": 0.00018938783990857857,
|
| 446 |
+
"loss": 4.4364,
|
| 447 |
+
"step": 2550
|
| 448 |
+
},
|
| 449 |
+
{
|
| 450 |
+
"epoch": 0.19998461656795632,
|
| 451 |
+
"grad_norm": 3.844686985015869,
|
| 452 |
+
"learning_rate": 0.00018912811988676208,
|
| 453 |
+
"loss": 4.6437,
|
| 454 |
+
"step": 2600
|
| 455 |
+
},
|
| 456 |
+
{
|
| 457 |
+
"epoch": 0.20383047457887854,
|
| 458 |
+
"grad_norm": 3.4633946418762207,
|
| 459 |
+
"learning_rate": 0.0001888683998649456,
|
| 460 |
+
"loss": 4.5016,
|
| 461 |
+
"step": 2650
|
| 462 |
+
},
|
| 463 |
+
{
|
| 464 |
+
"epoch": 0.20767633258980078,
|
| 465 |
+
"grad_norm": 3.7852296829223633,
|
| 466 |
+
"learning_rate": 0.0001886086798431291,
|
| 467 |
+
"loss": 4.5845,
|
| 468 |
+
"step": 2700
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"epoch": 0.21152219060072303,
|
| 472 |
+
"grad_norm": 3.8716065883636475,
|
| 473 |
+
"learning_rate": 0.00018834895982131265,
|
| 474 |
+
"loss": 4.3669,
|
| 475 |
+
"step": 2750
|
| 476 |
+
},
|
| 477 |
+
{
|
| 478 |
+
"epoch": 0.21152219060072303,
|
| 479 |
+
"eval_loss": 4.602295875549316,
|
| 480 |
+
"eval_runtime": 18.4747,
|
| 481 |
+
"eval_samples_per_second": 54.128,
|
| 482 |
+
"eval_steps_per_second": 13.532,
|
| 483 |
+
"step": 2750
|
| 484 |
+
},
|
| 485 |
+
{
|
| 486 |
+
"epoch": 0.21536804861164527,
|
| 487 |
+
"grad_norm": 3.9932167530059814,
|
| 488 |
+
"learning_rate": 0.00018808923979949616,
|
| 489 |
+
"loss": 4.4545,
|
| 490 |
+
"step": 2800
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"epoch": 0.21921390662256748,
|
| 494 |
+
"grad_norm": 4.182176113128662,
|
| 495 |
+
"learning_rate": 0.00018782951977767967,
|
| 496 |
+
"loss": 4.5355,
|
| 497 |
+
"step": 2850
|
| 498 |
+
},
|
| 499 |
+
{
|
| 500 |
+
"epoch": 0.22305976463348973,
|
| 501 |
+
"grad_norm": 3.776895523071289,
|
| 502 |
+
"learning_rate": 0.00018756979975586318,
|
| 503 |
+
"loss": 4.6859,
|
| 504 |
+
"step": 2900
|
| 505 |
+
},
|
| 506 |
+
{
|
| 507 |
+
"epoch": 0.22690562264441197,
|
| 508 |
+
"grad_norm": 3.9219324588775635,
|
| 509 |
+
"learning_rate": 0.0001873100797340467,
|
| 510 |
+
"loss": 4.5186,
|
| 511 |
+
"step": 2950
|
| 512 |
+
},
|
| 513 |
+
{
|
| 514 |
+
"epoch": 0.2307514806553342,
|
| 515 |
+
"grad_norm": 3.7879323959350586,
|
| 516 |
+
"learning_rate": 0.0001870503597122302,
|
| 517 |
+
"loss": 4.612,
|
| 518 |
+
"step": 3000
|
| 519 |
+
},
|
| 520 |
+
{
|
| 521 |
+
"epoch": 0.2307514806553342,
|
| 522 |
+
"eval_loss": 4.579595565795898,
|
| 523 |
+
"eval_runtime": 18.476,
|
| 524 |
+
"eval_samples_per_second": 54.124,
|
| 525 |
+
"eval_steps_per_second": 13.531,
|
| 526 |
+
"step": 3000
|
| 527 |
}
|
| 528 |
],
|
| 529 |
"logging_steps": 50,
|