Training in progress, step 37500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3c4fec3fcfea21f991151e26f507a7695fb23bd8b856b2b2f700a67bb497070
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af333093453e4833a3b8e0d94d92ddd95f295c0d87e9e98a54a6ad3c390330e9
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c332c5ce1f57e169c0226452b69be2dc4fb900c8955ca04e773762307c8e5eb4
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddcf88ff022861d1e6c33a55560d1a3aa75a31ecba95d857fc1b29571146d9d8
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7b74025bac6cdf338bbaffce8798d5ebfeba84e2c0590feb5feb5210c2d2221
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f59e689571d05ebe27330dcd7978075e538dfc70e5b33155dcbd08ae7037e11
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 31000,
|
| 3 |
"best_metric": 0.7226839661598206,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-31000",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6372,6 +6372,92 @@
|
|
| 6372 |
"eval_samples_per_second": 56.187,
|
| 6373 |
"eval_steps_per_second": 14.047,
|
| 6374 |
"step": 37000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6375 |
}
|
| 6376 |
],
|
| 6377 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 31000,
|
| 3 |
"best_metric": 0.7226839661598206,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-31000",
|
| 5 |
+
"epoch": 2.8843935081916774,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 37500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6372 |
"eval_samples_per_second": 56.187,
|
| 6373 |
"eval_steps_per_second": 14.047,
|
| 6374 |
"step": 37000
|
| 6375 |
+
},
|
| 6376 |
+
{
|
| 6377 |
+
"epoch": 2.8497807860933775,
|
| 6378 |
+
"grad_norm": 1.1233916282653809,
|
| 6379 |
+
"learning_rate": 1.0217385658260397e-05,
|
| 6380 |
+
"loss": 0.7128,
|
| 6381 |
+
"step": 37050
|
| 6382 |
+
},
|
| 6383 |
+
{
|
| 6384 |
+
"epoch": 2.8536266441043,
|
| 6385 |
+
"grad_norm": 0.917649507522583,
|
| 6386 |
+
"learning_rate": 9.957665636443913e-06,
|
| 6387 |
+
"loss": 0.7402,
|
| 6388 |
+
"step": 37100
|
| 6389 |
+
},
|
| 6390 |
+
{
|
| 6391 |
+
"epoch": 2.857472502115222,
|
| 6392 |
+
"grad_norm": 0.8935102820396423,
|
| 6393 |
+
"learning_rate": 9.697945614627432e-06,
|
| 6394 |
+
"loss": 0.731,
|
| 6395 |
+
"step": 37150
|
| 6396 |
+
},
|
| 6397 |
+
{
|
| 6398 |
+
"epoch": 2.8613183601261443,
|
| 6399 |
+
"grad_norm": 0.6891331076622009,
|
| 6400 |
+
"learning_rate": 9.43822559281095e-06,
|
| 6401 |
+
"loss": 0.7331,
|
| 6402 |
+
"step": 37200
|
| 6403 |
+
},
|
| 6404 |
+
{
|
| 6405 |
+
"epoch": 2.8651642181370662,
|
| 6406 |
+
"grad_norm": 0.7505995631217957,
|
| 6407 |
+
"learning_rate": 9.178505570994468e-06,
|
| 6408 |
+
"loss": 0.6744,
|
| 6409 |
+
"step": 37250
|
| 6410 |
+
},
|
| 6411 |
+
{
|
| 6412 |
+
"epoch": 2.8651642181370662,
|
| 6413 |
+
"eval_loss": 0.7693511247634888,
|
| 6414 |
+
"eval_runtime": 17.8693,
|
| 6415 |
+
"eval_samples_per_second": 55.962,
|
| 6416 |
+
"eval_steps_per_second": 13.99,
|
| 6417 |
+
"step": 37250
|
| 6418 |
+
},
|
| 6419 |
+
{
|
| 6420 |
+
"epoch": 2.8690100761479886,
|
| 6421 |
+
"grad_norm": 1.2373569011688232,
|
| 6422 |
+
"learning_rate": 8.918785549177986e-06,
|
| 6423 |
+
"loss": 0.6981,
|
| 6424 |
+
"step": 37300
|
| 6425 |
+
},
|
| 6426 |
+
{
|
| 6427 |
+
"epoch": 2.8728559341589106,
|
| 6428 |
+
"grad_norm": 0.9159016013145447,
|
| 6429 |
+
"learning_rate": 8.659065527361506e-06,
|
| 6430 |
+
"loss": 0.7601,
|
| 6431 |
+
"step": 37350
|
| 6432 |
+
},
|
| 6433 |
+
{
|
| 6434 |
+
"epoch": 2.876701792169833,
|
| 6435 |
+
"grad_norm": 0.3170250952243805,
|
| 6436 |
+
"learning_rate": 8.399345505545022e-06,
|
| 6437 |
+
"loss": 0.7008,
|
| 6438 |
+
"step": 37400
|
| 6439 |
+
},
|
| 6440 |
+
{
|
| 6441 |
+
"epoch": 2.8805476501807554,
|
| 6442 |
+
"grad_norm": 0.7592608332633972,
|
| 6443 |
+
"learning_rate": 8.139625483728542e-06,
|
| 6444 |
+
"loss": 0.6966,
|
| 6445 |
+
"step": 37450
|
| 6446 |
+
},
|
| 6447 |
+
{
|
| 6448 |
+
"epoch": 2.8843935081916774,
|
| 6449 |
+
"grad_norm": 0.7826717495918274,
|
| 6450 |
+
"learning_rate": 7.879905461912058e-06,
|
| 6451 |
+
"loss": 0.7398,
|
| 6452 |
+
"step": 37500
|
| 6453 |
+
},
|
| 6454 |
+
{
|
| 6455 |
+
"epoch": 2.8843935081916774,
|
| 6456 |
+
"eval_loss": 0.7694031596183777,
|
| 6457 |
+
"eval_runtime": 17.9358,
|
| 6458 |
+
"eval_samples_per_second": 55.754,
|
| 6459 |
+
"eval_steps_per_second": 13.939,
|
| 6460 |
+
"step": 37500
|
| 6461 |
}
|
| 6462 |
],
|
| 6463 |
"logging_steps": 50,
|