Training in progress, step 14000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13e9082dd993ab0c4dc4999c1db1ee4781bb37e4ef2b3309b62916fe0af14e9e
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b05a5a771bbfa42b5ce6876a4e742b487145ad1b23810ea34ba9924b54cc834
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c567148d53eafd6022545201901938ff6ac986ce6ba91de6582e61fe1a67fdf3
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5de168b313b50f7920582863eb6c48735221da70f052aa0c3517b7e8965981bd
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6148534a27a25fb6f834b4dad22172177ef760e29ec4f90db326b0fc73929937
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d4b25db822a02d6858ac9bb141ed0e837701c9de7d32c7960967feccd1d18fc
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-13000",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2330,6 +2330,92 @@
|
|
| 2330 |
"eval_samples_per_second": 56.144,
|
| 2331 |
"eval_steps_per_second": 14.036,
|
| 2332 |
"step": 13500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2333 |
}
|
| 2334 |
],
|
| 2335 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 13750,
|
| 3 |
+
"best_metric": 1.5073590278625488,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-13000",
|
| 5 |
+
"epoch": 1.0768402430582262,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 14000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2330 |
"eval_samples_per_second": 56.144,
|
| 2331 |
"eval_steps_per_second": 14.036,
|
| 2332 |
"step": 13500
|
| 2333 |
+
},
|
| 2334 |
+
{
|
| 2335 |
+
"epoch": 1.042227520959926,
|
| 2336 |
+
"grad_norm": 2.056574583053589,
|
| 2337 |
+
"learning_rate": 0.00013226501831026155,
|
| 2338 |
+
"loss": 1.5387,
|
| 2339 |
+
"step": 13550
|
| 2340 |
+
},
|
| 2341 |
+
{
|
| 2342 |
+
"epoch": 1.0460733789708483,
|
| 2343 |
+
"grad_norm": 1.41805899143219,
|
| 2344 |
+
"learning_rate": 0.00013200529828844506,
|
| 2345 |
+
"loss": 1.4194,
|
| 2346 |
+
"step": 13600
|
| 2347 |
+
},
|
| 2348 |
+
{
|
| 2349 |
+
"epoch": 1.0499192369817707,
|
| 2350 |
+
"grad_norm": 1.5727626085281372,
|
| 2351 |
+
"learning_rate": 0.0001317455782666286,
|
| 2352 |
+
"loss": 1.4763,
|
| 2353 |
+
"step": 13650
|
| 2354 |
+
},
|
| 2355 |
+
{
|
| 2356 |
+
"epoch": 1.0537650949926929,
|
| 2357 |
+
"grad_norm": 1.8175796270370483,
|
| 2358 |
+
"learning_rate": 0.0001314858582448121,
|
| 2359 |
+
"loss": 1.5232,
|
| 2360 |
+
"step": 13700
|
| 2361 |
+
},
|
| 2362 |
+
{
|
| 2363 |
+
"epoch": 1.057610953003615,
|
| 2364 |
+
"grad_norm": 1.459721565246582,
|
| 2365 |
+
"learning_rate": 0.0001312261382229956,
|
| 2366 |
+
"loss": 1.4926,
|
| 2367 |
+
"step": 13750
|
| 2368 |
+
},
|
| 2369 |
+
{
|
| 2370 |
+
"epoch": 1.057610953003615,
|
| 2371 |
+
"eval_loss": 1.5073590278625488,
|
| 2372 |
+
"eval_runtime": 17.8208,
|
| 2373 |
+
"eval_samples_per_second": 56.114,
|
| 2374 |
+
"eval_steps_per_second": 14.029,
|
| 2375 |
+
"step": 13750
|
| 2376 |
+
},
|
| 2377 |
+
{
|
| 2378 |
+
"epoch": 1.0614568110145373,
|
| 2379 |
+
"grad_norm": 1.7236889600753784,
|
| 2380 |
+
"learning_rate": 0.00013096641820117914,
|
| 2381 |
+
"loss": 1.4485,
|
| 2382 |
+
"step": 13800
|
| 2383 |
+
},
|
| 2384 |
+
{
|
| 2385 |
+
"epoch": 1.0653026690254597,
|
| 2386 |
+
"grad_norm": 1.1652172803878784,
|
| 2387 |
+
"learning_rate": 0.00013070669817936265,
|
| 2388 |
+
"loss": 1.4706,
|
| 2389 |
+
"step": 13850
|
| 2390 |
+
},
|
| 2391 |
+
{
|
| 2392 |
+
"epoch": 1.0691485270363819,
|
| 2393 |
+
"grad_norm": 1.1279985904693604,
|
| 2394 |
+
"learning_rate": 0.00013044697815754616,
|
| 2395 |
+
"loss": 1.5507,
|
| 2396 |
+
"step": 13900
|
| 2397 |
+
},
|
| 2398 |
+
{
|
| 2399 |
+
"epoch": 1.072994385047304,
|
| 2400 |
+
"grad_norm": 2.2368061542510986,
|
| 2401 |
+
"learning_rate": 0.0001301872581357297,
|
| 2402 |
+
"loss": 1.5184,
|
| 2403 |
+
"step": 13950
|
| 2404 |
+
},
|
| 2405 |
+
{
|
| 2406 |
+
"epoch": 1.0768402430582262,
|
| 2407 |
+
"grad_norm": 1.1515541076660156,
|
| 2408 |
+
"learning_rate": 0.00012992753811391322,
|
| 2409 |
+
"loss": 1.5184,
|
| 2410 |
+
"step": 14000
|
| 2411 |
+
},
|
| 2412 |
+
{
|
| 2413 |
+
"epoch": 1.0768402430582262,
|
| 2414 |
+
"eval_loss": 1.5123000144958496,
|
| 2415 |
+
"eval_runtime": 17.8325,
|
| 2416 |
+
"eval_samples_per_second": 56.077,
|
| 2417 |
+
"eval_steps_per_second": 14.019,
|
| 2418 |
+
"step": 14000
|
| 2419 |
}
|
| 2420 |
],
|
| 2421 |
"logging_steps": 50,
|