Training in progress, step 37000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9cae79982037b0452f88044e66e5767215389fccb1f927236fe4f45e26d9504
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54bc70c1b2659e8cf4a531b6927e07e28f599374e8be262691d66ada5fbe0c3e
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4200f071699c851f6d878a635058a9544f3a748301e98f6330acbcbe7627da2
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6364eda1ec8fc4c6324fd5a2a0079028d1479286c7c9330101ded35fc9bcedd3
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0470b2f59ff591bc600871f7546ce2622ab2681c7ababfb55c537235153f145
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bc995789ac7ace85eec5527f15f9a82c9f1388944ba2d5baa678f54ce3d8943
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": -30.
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-36500",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6286,6 +6286,92 @@
|
|
| 6286 |
"eval_samples_per_second": 59.248,
|
| 6287 |
"eval_steps_per_second": 14.812,
|
| 6288 |
"step": 36500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6289 |
}
|
| 6290 |
],
|
| 6291 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 36750,
|
| 3 |
+
"best_metric": -30.470460891723633,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-36500",
|
| 5 |
+
"epoch": 2.845934928082455,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 37000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6286 |
"eval_samples_per_second": 59.248,
|
| 6287 |
"eval_steps_per_second": 14.812,
|
| 6288 |
"step": 36500
|
| 6289 |
+
},
|
| 6290 |
+
{
|
| 6291 |
+
"epoch": 2.811322205984155,
|
| 6292 |
+
"grad_norm": 0.8461622595787048,
|
| 6293 |
+
"learning_rate": 1.2824974677297874e-05,
|
| 6294 |
+
"loss": -30.466,
|
| 6295 |
+
"step": 36550
|
| 6296 |
+
},
|
| 6297 |
+
{
|
| 6298 |
+
"epoch": 2.8151680639950776,
|
| 6299 |
+
"grad_norm": 1.3280683755874634,
|
| 6300 |
+
"learning_rate": 1.2565254655481392e-05,
|
| 6301 |
+
"loss": -30.466,
|
| 6302 |
+
"step": 36600
|
| 6303 |
+
},
|
| 6304 |
+
{
|
| 6305 |
+
"epoch": 2.8190139220059995,
|
| 6306 |
+
"grad_norm": 1.5778801441192627,
|
| 6307 |
+
"learning_rate": 1.230553463366491e-05,
|
| 6308 |
+
"loss": -30.4659,
|
| 6309 |
+
"step": 36650
|
| 6310 |
+
},
|
| 6311 |
+
{
|
| 6312 |
+
"epoch": 2.8228597800169215,
|
| 6313 |
+
"grad_norm": 1.6054023504257202,
|
| 6314 |
+
"learning_rate": 1.204581461184843e-05,
|
| 6315 |
+
"loss": -30.4661,
|
| 6316 |
+
"step": 36700
|
| 6317 |
+
},
|
| 6318 |
+
{
|
| 6319 |
+
"epoch": 2.826705638027844,
|
| 6320 |
+
"grad_norm": 1.3517789840698242,
|
| 6321 |
+
"learning_rate": 1.1786094590031946e-05,
|
| 6322 |
+
"loss": -30.4661,
|
| 6323 |
+
"step": 36750
|
| 6324 |
+
},
|
| 6325 |
+
{
|
| 6326 |
+
"epoch": 2.826705638027844,
|
| 6327 |
+
"eval_loss": -30.470460891723633,
|
| 6328 |
+
"eval_runtime": 16.9586,
|
| 6329 |
+
"eval_samples_per_second": 58.967,
|
| 6330 |
+
"eval_steps_per_second": 14.742,
|
| 6331 |
+
"step": 36750
|
| 6332 |
+
},
|
| 6333 |
+
{
|
| 6334 |
+
"epoch": 2.8305514960387663,
|
| 6335 |
+
"grad_norm": 1.1399978399276733,
|
| 6336 |
+
"learning_rate": 1.1526374568215465e-05,
|
| 6337 |
+
"loss": -30.4658,
|
| 6338 |
+
"step": 36800
|
| 6339 |
+
},
|
| 6340 |
+
{
|
| 6341 |
+
"epoch": 2.8343973540496883,
|
| 6342 |
+
"grad_norm": 0.8231783509254456,
|
| 6343 |
+
"learning_rate": 1.1266654546398983e-05,
|
| 6344 |
+
"loss": -30.4663,
|
| 6345 |
+
"step": 36850
|
| 6346 |
+
},
|
| 6347 |
+
{
|
| 6348 |
+
"epoch": 2.8382432120606107,
|
| 6349 |
+
"grad_norm": 0.8370407819747925,
|
| 6350 |
+
"learning_rate": 1.1006934524582501e-05,
|
| 6351 |
+
"loss": -30.4662,
|
| 6352 |
+
"step": 36900
|
| 6353 |
+
},
|
| 6354 |
+
{
|
| 6355 |
+
"epoch": 2.842089070071533,
|
| 6356 |
+
"grad_norm": 0.8367822170257568,
|
| 6357 |
+
"learning_rate": 1.0747214502766019e-05,
|
| 6358 |
+
"loss": -30.4662,
|
| 6359 |
+
"step": 36950
|
| 6360 |
+
},
|
| 6361 |
+
{
|
| 6362 |
+
"epoch": 2.845934928082455,
|
| 6363 |
+
"grad_norm": 0.8438307642936707,
|
| 6364 |
+
"learning_rate": 1.0487494480949537e-05,
|
| 6365 |
+
"loss": -30.4661,
|
| 6366 |
+
"step": 37000
|
| 6367 |
+
},
|
| 6368 |
+
{
|
| 6369 |
+
"epoch": 2.845934928082455,
|
| 6370 |
+
"eval_loss": -30.470365524291992,
|
| 6371 |
+
"eval_runtime": 16.8139,
|
| 6372 |
+
"eval_samples_per_second": 59.475,
|
| 6373 |
+
"eval_steps_per_second": 14.869,
|
| 6374 |
+
"step": 37000
|
| 6375 |
}
|
| 6376 |
],
|
| 6377 |
"logging_steps": 50,
|