Training in progress, step 15000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35e8b88b22335eff4aa95a5db7b6615364abd358d14317af2dec16bcdb2efc61
|
| 3 |
size 36730224
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8a93a0264eb6605e0d626be93ba5f8dee38234bcac32f7e6d5d1cccc2a1057f
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c298ef220b2440e2c8688c2918aba70637e9e1c8a2951767772c4969343135c8
|
| 3 |
+
size 14308
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08eda93be407e2e8125df91c801018bcedfb4cde17a8cf02b9f76837f9ade1c1
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a00183886b48331e49cfb9b953bfa8a92696629d3ac41c1b27b8636569368855
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2502,6 +2502,92 @@
|
|
| 2502 |
"eval_samples_per_second": 55.51,
|
| 2503 |
"eval_steps_per_second": 13.877,
|
| 2504 |
"step": 14500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2505 |
}
|
| 2506 |
],
|
| 2507 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 14750,
|
| 3 |
+
"best_metric": 1.4990500211715698,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
|
| 5 |
+
"epoch": 1.153757403276671,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 15000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2502 |
"eval_samples_per_second": 55.51,
|
| 2503 |
"eval_steps_per_second": 13.877,
|
| 2504 |
"step": 14500
|
| 2505 |
+
},
|
| 2506 |
+
{
|
| 2507 |
+
"epoch": 1.119144681178371,
|
| 2508 |
+
"grad_norm": 1.9716360569000244,
|
| 2509 |
+
"learning_rate": 6.568434984007941e-05,
|
| 2510 |
+
"loss": 1.5349,
|
| 2511 |
+
"step": 14550
|
| 2512 |
+
},
|
| 2513 |
+
{
|
| 2514 |
+
"epoch": 1.1229905391892931,
|
| 2515 |
+
"grad_norm": 0.710033655166626,
|
| 2516 |
+
"learning_rate": 6.542700636006029e-05,
|
| 2517 |
+
"loss": 1.4107,
|
| 2518 |
+
"step": 14600
|
| 2519 |
+
},
|
| 2520 |
+
{
|
| 2521 |
+
"epoch": 1.1268363972002153,
|
| 2522 |
+
"grad_norm": 1.4398375749588013,
|
| 2523 |
+
"learning_rate": 6.516966288004117e-05,
|
| 2524 |
+
"loss": 1.4185,
|
| 2525 |
+
"step": 14650
|
| 2526 |
+
},
|
| 2527 |
+
{
|
| 2528 |
+
"epoch": 1.1306822552111375,
|
| 2529 |
+
"grad_norm": 2.5566532611846924,
|
| 2530 |
+
"learning_rate": 6.491231940002206e-05,
|
| 2531 |
+
"loss": 1.5758,
|
| 2532 |
+
"step": 14700
|
| 2533 |
+
},
|
| 2534 |
+
{
|
| 2535 |
+
"epoch": 1.13452811322206,
|
| 2536 |
+
"grad_norm": 1.2500799894332886,
|
| 2537 |
+
"learning_rate": 6.465497592000294e-05,
|
| 2538 |
+
"loss": 1.4751,
|
| 2539 |
+
"step": 14750
|
| 2540 |
+
},
|
| 2541 |
+
{
|
| 2542 |
+
"epoch": 1.13452811322206,
|
| 2543 |
+
"eval_loss": 1.4990500211715698,
|
| 2544 |
+
"eval_runtime": 17.9979,
|
| 2545 |
+
"eval_samples_per_second": 55.562,
|
| 2546 |
+
"eval_steps_per_second": 13.891,
|
| 2547 |
+
"step": 14750
|
| 2548 |
+
},
|
| 2549 |
+
{
|
| 2550 |
+
"epoch": 1.1383739712329821,
|
| 2551 |
+
"grad_norm": 1.5937495231628418,
|
| 2552 |
+
"learning_rate": 6.439763243998382e-05,
|
| 2553 |
+
"loss": 1.5215,
|
| 2554 |
+
"step": 14800
|
| 2555 |
+
},
|
| 2556 |
+
{
|
| 2557 |
+
"epoch": 1.1422198292439043,
|
| 2558 |
+
"grad_norm": 1.362358570098877,
|
| 2559 |
+
"learning_rate": 6.41402889599647e-05,
|
| 2560 |
+
"loss": 1.5125,
|
| 2561 |
+
"step": 14850
|
| 2562 |
+
},
|
| 2563 |
+
{
|
| 2564 |
+
"epoch": 1.1460656872548265,
|
| 2565 |
+
"grad_norm": 2.1192502975463867,
|
| 2566 |
+
"learning_rate": 6.388294547994558e-05,
|
| 2567 |
+
"loss": 1.4485,
|
| 2568 |
+
"step": 14900
|
| 2569 |
+
},
|
| 2570 |
+
{
|
| 2571 |
+
"epoch": 1.149911545265749,
|
| 2572 |
+
"grad_norm": 1.4089174270629883,
|
| 2573 |
+
"learning_rate": 6.362560199992647e-05,
|
| 2574 |
+
"loss": 1.5331,
|
| 2575 |
+
"step": 14950
|
| 2576 |
+
},
|
| 2577 |
+
{
|
| 2578 |
+
"epoch": 1.153757403276671,
|
| 2579 |
+
"grad_norm": 1.3750373125076294,
|
| 2580 |
+
"learning_rate": 6.336825851990735e-05,
|
| 2581 |
+
"loss": 1.5177,
|
| 2582 |
+
"step": 15000
|
| 2583 |
+
},
|
| 2584 |
+
{
|
| 2585 |
+
"epoch": 1.153757403276671,
|
| 2586 |
+
"eval_loss": 1.5118192434310913,
|
| 2587 |
+
"eval_runtime": 17.9213,
|
| 2588 |
+
"eval_samples_per_second": 55.799,
|
| 2589 |
+
"eval_steps_per_second": 13.95,
|
| 2590 |
+
"step": 15000
|
| 2591 |
}
|
| 2592 |
],
|
| 2593 |
"logging_steps": 50,
|