Training in progress, step 16500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d2bb37f0bb2536ca33711abe7b3143ef3974ef2ca0c218977672374d7ca8e27
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3916cf9823a85ba87d1391c608e5091a2091feb324e91b9bb2a76ddf86c0982d
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:518b6ef8fc143d463491edf1260763c10b5e11e873cd36abf76a046091e577c9
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d28d5f3a49ada942ec355f276b33d6e4bff345075b872b6e2b651f5666a06f0a
|
| 3 |
+
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b340848445b4f5b2d5567ddb4c818db5bf89137348148d19bd0b15ff0d69c43d
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d72187e2b5b8f8cffa32a0fbea90f82e23c165518eb95af21f7e18bd9e8c4a42
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-15500",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2760,6 +2760,92 @@
|
|
| 2760 |
"eval_samples_per_second": 56.159,
|
| 2761 |
"eval_steps_per_second": 14.04,
|
| 2762 |
"step": 16000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2763 |
}
|
| 2764 |
],
|
| 2765 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 16250,
|
| 3 |
+
"best_metric": 1.4893407821655273,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-15500",
|
| 5 |
+
"epoch": 1.2691331436043383,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 16500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2760 |
"eval_samples_per_second": 56.159,
|
| 2761 |
"eval_steps_per_second": 14.04,
|
| 2762 |
"step": 16000
|
| 2763 |
+
},
|
| 2764 |
+
{
|
| 2765 |
+
"epoch": 1.234520421506038,
|
| 2766 |
+
"grad_norm": 6.292486667633057,
|
| 2767 |
+
"learning_rate": 0.00011927901721943746,
|
| 2768 |
+
"loss": 1.3934,
|
| 2769 |
+
"step": 16050
|
| 2770 |
+
},
|
| 2771 |
+
{
|
| 2772 |
+
"epoch": 1.2383662795169603,
|
| 2773 |
+
"grad_norm": 1.6574532985687256,
|
| 2774 |
+
"learning_rate": 0.00011901929719762097,
|
| 2775 |
+
"loss": 1.4821,
|
| 2776 |
+
"step": 16100
|
| 2777 |
+
},
|
| 2778 |
+
{
|
| 2779 |
+
"epoch": 1.2422121375278825,
|
| 2780 |
+
"grad_norm": 1.8651037216186523,
|
| 2781 |
+
"learning_rate": 0.00011875957717580448,
|
| 2782 |
+
"loss": 1.49,
|
| 2783 |
+
"step": 16150
|
| 2784 |
+
},
|
| 2785 |
+
{
|
| 2786 |
+
"epoch": 1.2460579955388047,
|
| 2787 |
+
"grad_norm": 1.3768175840377808,
|
| 2788 |
+
"learning_rate": 0.00011849985715398801,
|
| 2789 |
+
"loss": 1.4579,
|
| 2790 |
+
"step": 16200
|
| 2791 |
+
},
|
| 2792 |
+
{
|
| 2793 |
+
"epoch": 1.2499038535497269,
|
| 2794 |
+
"grad_norm": 1.1569020748138428,
|
| 2795 |
+
"learning_rate": 0.00011824013713217152,
|
| 2796 |
+
"loss": 1.4029,
|
| 2797 |
+
"step": 16250
|
| 2798 |
+
},
|
| 2799 |
+
{
|
| 2800 |
+
"epoch": 1.2499038535497269,
|
| 2801 |
+
"eval_loss": 1.4893407821655273,
|
| 2802 |
+
"eval_runtime": 17.9552,
|
| 2803 |
+
"eval_samples_per_second": 55.694,
|
| 2804 |
+
"eval_steps_per_second": 13.924,
|
| 2805 |
+
"step": 16250
|
| 2806 |
+
},
|
| 2807 |
+
{
|
| 2808 |
+
"epoch": 1.253749711560649,
|
| 2809 |
+
"grad_norm": 1.8632296323776245,
|
| 2810 |
+
"learning_rate": 0.00011798041711035504,
|
| 2811 |
+
"loss": 1.4592,
|
| 2812 |
+
"step": 16300
|
| 2813 |
+
},
|
| 2814 |
+
{
|
| 2815 |
+
"epoch": 1.2575955695715715,
|
| 2816 |
+
"grad_norm": 1.8080470561981201,
|
| 2817 |
+
"learning_rate": 0.00011772069708853856,
|
| 2818 |
+
"loss": 1.4678,
|
| 2819 |
+
"step": 16350
|
| 2820 |
+
},
|
| 2821 |
+
{
|
| 2822 |
+
"epoch": 1.2614414275824937,
|
| 2823 |
+
"grad_norm": 1.4193981885910034,
|
| 2824 |
+
"learning_rate": 0.00011746097706672208,
|
| 2825 |
+
"loss": 1.5031,
|
| 2826 |
+
"step": 16400
|
| 2827 |
+
},
|
| 2828 |
+
{
|
| 2829 |
+
"epoch": 1.2652872855934159,
|
| 2830 |
+
"grad_norm": 1.5050238370895386,
|
| 2831 |
+
"learning_rate": 0.00011720125704490559,
|
| 2832 |
+
"loss": 1.4107,
|
| 2833 |
+
"step": 16450
|
| 2834 |
+
},
|
| 2835 |
+
{
|
| 2836 |
+
"epoch": 1.2691331436043383,
|
| 2837 |
+
"grad_norm": 1.12454092502594,
|
| 2838 |
+
"learning_rate": 0.00011694153702308911,
|
| 2839 |
+
"loss": 1.4572,
|
| 2840 |
+
"step": 16500
|
| 2841 |
+
},
|
| 2842 |
+
{
|
| 2843 |
+
"epoch": 1.2691331436043383,
|
| 2844 |
+
"eval_loss": 1.4972718954086304,
|
| 2845 |
+
"eval_runtime": 17.8677,
|
| 2846 |
+
"eval_samples_per_second": 55.967,
|
| 2847 |
+
"eval_steps_per_second": 13.992,
|
| 2848 |
+
"step": 16500
|
| 2849 |
}
|
| 2850 |
],
|
| 2851 |
"logging_steps": 50,
|