Training in progress, step 15000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9310d7742dd19e663f6402741ea20db935d3b2e0e01a71d5a768ac7db25bc72
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c1a1611c8e46f685348b5dfe48ae762df7c9c3b98434f8da77fcc5f9f81a751
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f525658a7b4e5ff255750bc03647522ca336d6f40ea836aaa028d9a5bebc0152
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c298ef220b2440e2c8688c2918aba70637e9e1c8a2951767772c4969343135c8
|
| 3 |
+
size 14308
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:453f146988a415e18f9bddb29836fc724515457272291d8aed123845d69e5c29
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73ddc28e63ea8c6b2f70c524e6eba5501abe22b3d62b8aa4437458f65de50c75
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 13750,
|
| 3 |
"best_metric": 1.5073590278625488,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-13000",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2502,6 +2502,92 @@
|
|
| 2502 |
"eval_samples_per_second": 56.602,
|
| 2503 |
"eval_steps_per_second": 14.151,
|
| 2504 |
"step": 14500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2505 |
}
|
| 2506 |
],
|
| 2507 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 13750,
|
| 3 |
"best_metric": 1.5073590278625488,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-13000",
|
| 5 |
+
"epoch": 1.153757403276671,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 15000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2502 |
"eval_samples_per_second": 56.602,
|
| 2503 |
"eval_steps_per_second": 14.151,
|
| 2504 |
"step": 14500
|
| 2505 |
+
},
|
| 2506 |
+
{
|
| 2507 |
+
"epoch": 1.119144681178371,
|
| 2508 |
+
"grad_norm": 1.6422228813171387,
|
| 2509 |
+
"learning_rate": 0.0001270706178739319,
|
| 2510 |
+
"loss": 1.5494,
|
| 2511 |
+
"step": 14550
|
| 2512 |
+
},
|
| 2513 |
+
{
|
| 2514 |
+
"epoch": 1.1229905391892931,
|
| 2515 |
+
"grad_norm": 0.7187716960906982,
|
| 2516 |
+
"learning_rate": 0.00012681089785211542,
|
| 2517 |
+
"loss": 1.4221,
|
| 2518 |
+
"step": 14600
|
| 2519 |
+
},
|
| 2520 |
+
{
|
| 2521 |
+
"epoch": 1.1268363972002153,
|
| 2522 |
+
"grad_norm": 1.2605098485946655,
|
| 2523 |
+
"learning_rate": 0.00012655117783029896,
|
| 2524 |
+
"loss": 1.4299,
|
| 2525 |
+
"step": 14650
|
| 2526 |
+
},
|
| 2527 |
+
{
|
| 2528 |
+
"epoch": 1.1306822552111375,
|
| 2529 |
+
"grad_norm": 2.598015069961548,
|
| 2530 |
+
"learning_rate": 0.00012629145780848245,
|
| 2531 |
+
"loss": 1.5757,
|
| 2532 |
+
"step": 14700
|
| 2533 |
+
},
|
| 2534 |
+
{
|
| 2535 |
+
"epoch": 1.13452811322206,
|
| 2536 |
+
"grad_norm": 1.4004614353179932,
|
| 2537 |
+
"learning_rate": 0.00012603173778666596,
|
| 2538 |
+
"loss": 1.482,
|
| 2539 |
+
"step": 14750
|
| 2540 |
+
},
|
| 2541 |
+
{
|
| 2542 |
+
"epoch": 1.13452811322206,
|
| 2543 |
+
"eval_loss": 1.5089725255966187,
|
| 2544 |
+
"eval_runtime": 17.9036,
|
| 2545 |
+
"eval_samples_per_second": 55.855,
|
| 2546 |
+
"eval_steps_per_second": 13.964,
|
| 2547 |
+
"step": 14750
|
| 2548 |
+
},
|
| 2549 |
+
{
|
| 2550 |
+
"epoch": 1.1383739712329821,
|
| 2551 |
+
"grad_norm": 1.3800735473632812,
|
| 2552 |
+
"learning_rate": 0.0001257720177648495,
|
| 2553 |
+
"loss": 1.5285,
|
| 2554 |
+
"step": 14800
|
| 2555 |
+
},
|
| 2556 |
+
{
|
| 2557 |
+
"epoch": 1.1422198292439043,
|
| 2558 |
+
"grad_norm": 1.3741459846496582,
|
| 2559 |
+
"learning_rate": 0.00012551229774303301,
|
| 2560 |
+
"loss": 1.5242,
|
| 2561 |
+
"step": 14850
|
| 2562 |
+
},
|
| 2563 |
+
{
|
| 2564 |
+
"epoch": 1.1460656872548265,
|
| 2565 |
+
"grad_norm": 2.232680559158325,
|
| 2566 |
+
"learning_rate": 0.00012525257772121653,
|
| 2567 |
+
"loss": 1.4483,
|
| 2568 |
+
"step": 14900
|
| 2569 |
+
},
|
| 2570 |
+
{
|
| 2571 |
+
"epoch": 1.149911545265749,
|
| 2572 |
+
"grad_norm": 1.4408409595489502,
|
| 2573 |
+
"learning_rate": 0.00012499285769940007,
|
| 2574 |
+
"loss": 1.5414,
|
| 2575 |
+
"step": 14950
|
| 2576 |
+
},
|
| 2577 |
+
{
|
| 2578 |
+
"epoch": 1.153757403276671,
|
| 2579 |
+
"grad_norm": 1.5221819877624512,
|
| 2580 |
+
"learning_rate": 0.00012473313767758355,
|
| 2581 |
+
"loss": 1.5246,
|
| 2582 |
+
"step": 15000
|
| 2583 |
+
},
|
| 2584 |
+
{
|
| 2585 |
+
"epoch": 1.153757403276671,
|
| 2586 |
+
"eval_loss": 1.516871452331543,
|
| 2587 |
+
"eval_runtime": 17.9308,
|
| 2588 |
+
"eval_samples_per_second": 55.77,
|
| 2589 |
+
"eval_steps_per_second": 13.943,
|
| 2590 |
+
"step": 15000
|
| 2591 |
}
|
| 2592 |
],
|
| 2593 |
"logging_steps": 50,
|