Training in progress, step 17500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fd64b4fff9be5f0c3e2f6b4d9919b9e6375df4b9f578e7ba32211a7c5285de9
|
| 3 |
size 36730224
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d27c5a577064cfc46e35c17df10152b71102230382c1124b730922de85a7cde
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c6f63d896b1e09539b72ac15eabafd681bf824376794bc90d0981fb00940917
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a3d47a33fe410d7c39aed7512ee64bb9c07ee05e603db567ade52fed1bf5077
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fefc995528f5d0eb5c6c7d885b8cddef853e490bf4df60e860a20196ce82f7a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 16750,
|
| 3 |
"best_metric": 1.4708431959152222,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2932,6 +2932,92 @@
|
|
| 2932 |
"eval_samples_per_second": 55.488,
|
| 2933 |
"eval_steps_per_second": 13.872,
|
| 2934 |
"step": 17000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2935 |
}
|
| 2936 |
],
|
| 2937 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 16750,
|
| 3 |
"best_metric": 1.4708431959152222,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
|
| 5 |
+
"epoch": 1.346050303822783,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 17500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2932 |
"eval_samples_per_second": 55.488,
|
| 2933 |
"eval_steps_per_second": 13.872,
|
| 2934 |
"step": 17000
|
| 2935 |
+
},
|
| 2936 |
+
{
|
| 2937 |
+
"epoch": 1.3114375817244828,
|
| 2938 |
+
"grad_norm": 1.732088327407837,
|
| 2939 |
+
"learning_rate": 5.281717583912356e-05,
|
| 2940 |
+
"loss": 1.5014,
|
| 2941 |
+
"step": 17050
|
| 2942 |
+
},
|
| 2943 |
+
{
|
| 2944 |
+
"epoch": 1.315283439735405,
|
| 2945 |
+
"grad_norm": 2.144697427749634,
|
| 2946 |
+
"learning_rate": 5.2559832359104435e-05,
|
| 2947 |
+
"loss": 1.4436,
|
| 2948 |
+
"step": 17100
|
| 2949 |
+
},
|
| 2950 |
+
{
|
| 2951 |
+
"epoch": 1.3191292977463271,
|
| 2952 |
+
"grad_norm": 1.649965763092041,
|
| 2953 |
+
"learning_rate": 5.230248887908532e-05,
|
| 2954 |
+
"loss": 1.4334,
|
| 2955 |
+
"step": 17150
|
| 2956 |
+
},
|
| 2957 |
+
{
|
| 2958 |
+
"epoch": 1.3229751557572493,
|
| 2959 |
+
"grad_norm": 0.8667518496513367,
|
| 2960 |
+
"learning_rate": 5.204514539906621e-05,
|
| 2961 |
+
"loss": 1.487,
|
| 2962 |
+
"step": 17200
|
| 2963 |
+
},
|
| 2964 |
+
{
|
| 2965 |
+
"epoch": 1.3268210137681717,
|
| 2966 |
+
"grad_norm": 1.4567649364471436,
|
| 2967 |
+
"learning_rate": 5.178780191904709e-05,
|
| 2968 |
+
"loss": 1.4714,
|
| 2969 |
+
"step": 17250
|
| 2970 |
+
},
|
| 2971 |
+
{
|
| 2972 |
+
"epoch": 1.3268210137681717,
|
| 2973 |
+
"eval_loss": 1.479749321937561,
|
| 2974 |
+
"eval_runtime": 17.9466,
|
| 2975 |
+
"eval_samples_per_second": 55.721,
|
| 2976 |
+
"eval_steps_per_second": 13.93,
|
| 2977 |
+
"step": 17250
|
| 2978 |
+
},
|
| 2979 |
+
{
|
| 2980 |
+
"epoch": 1.330666871779094,
|
| 2981 |
+
"grad_norm": 1.8523489236831665,
|
| 2982 |
+
"learning_rate": 5.1530458439027974e-05,
|
| 2983 |
+
"loss": 1.4718,
|
| 2984 |
+
"step": 17300
|
| 2985 |
+
},
|
| 2986 |
+
{
|
| 2987 |
+
"epoch": 1.3345127297900161,
|
| 2988 |
+
"grad_norm": 1.091204047203064,
|
| 2989 |
+
"learning_rate": 5.127311495900886e-05,
|
| 2990 |
+
"loss": 1.4012,
|
| 2991 |
+
"step": 17350
|
| 2992 |
+
},
|
| 2993 |
+
{
|
| 2994 |
+
"epoch": 1.3383585878009385,
|
| 2995 |
+
"grad_norm": 1.8271427154541016,
|
| 2996 |
+
"learning_rate": 5.101577147898973e-05,
|
| 2997 |
+
"loss": 1.4547,
|
| 2998 |
+
"step": 17400
|
| 2999 |
+
},
|
| 3000 |
+
{
|
| 3001 |
+
"epoch": 1.3422044458118605,
|
| 3002 |
+
"grad_norm": 1.8682465553283691,
|
| 3003 |
+
"learning_rate": 5.075842799897062e-05,
|
| 3004 |
+
"loss": 1.4373,
|
| 3005 |
+
"step": 17450
|
| 3006 |
+
},
|
| 3007 |
+
{
|
| 3008 |
+
"epoch": 1.346050303822783,
|
| 3009 |
+
"grad_norm": 2.1932857036590576,
|
| 3010 |
+
"learning_rate": 5.0501084518951506e-05,
|
| 3011 |
+
"loss": 1.4628,
|
| 3012 |
+
"step": 17500
|
| 3013 |
+
},
|
| 3014 |
+
{
|
| 3015 |
+
"epoch": 1.346050303822783,
|
| 3016 |
+
"eval_loss": 1.4871113300323486,
|
| 3017 |
+
"eval_runtime": 17.9165,
|
| 3018 |
+
"eval_samples_per_second": 55.814,
|
| 3019 |
+
"eval_steps_per_second": 13.954,
|
| 3020 |
+
"step": 17500
|
| 3021 |
}
|
| 3022 |
],
|
| 3023 |
"logging_steps": 50,
|