Training in progress, step 36000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74ae96a07f0a1364fae979dc8192670789abe5ddf742fd023b1f7e6cff9f0baf
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14c071741a7326f16a83b0de63af44ed2b3c7961447b59e277e8e3f8fd62b865
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae2cee13b4f8b39a57ec4a0861bd7b8c66161ea17bdf36a9f9ee8d1ce1e2759c
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6591f94e486820dc9de5c16ae67ac55b097fbd6c09cd7885ab8396b22d79615
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f183c8ebab3f56f9652055ab8d2536f1affa446052e58c49d6e1ac9410f236aa
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:906c43e30b9edc8f130b3f77317e01f206c6e5f1267c22ca4899b9c5968ba8ca
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 34000,
|
| 3 |
"best_metric": 0.987713634967804,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-34000",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6114,6 +6114,92 @@
|
|
| 6114 |
"eval_samples_per_second": 58.178,
|
| 6115 |
"eval_steps_per_second": 14.544,
|
| 6116 |
"step": 35500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6117 |
}
|
| 6118 |
],
|
| 6119 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 34000,
|
| 3 |
"best_metric": 0.987713634967804,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-34000",
|
| 5 |
+
"epoch": 2.7690177678640104,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 36000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6114 |
"eval_samples_per_second": 58.178,
|
| 6115 |
"eval_steps_per_second": 14.544,
|
| 6116 |
"step": 35500
|
| 6117 |
+
},
|
| 6118 |
+
{
|
| 6119 |
+
"epoch": 2.73440504576571,
|
| 6120 |
+
"grad_norm": 0.66939777135849,
|
| 6121 |
+
"learning_rate": 1.801937511362751e-05,
|
| 6122 |
+
"loss": 0.8892,
|
| 6123 |
+
"step": 35550
|
| 6124 |
+
},
|
| 6125 |
+
{
|
| 6126 |
+
"epoch": 2.7382509037766325,
|
| 6127 |
+
"grad_norm": 1.0852998495101929,
|
| 6128 |
+
"learning_rate": 1.775965509181103e-05,
|
| 6129 |
+
"loss": 0.9502,
|
| 6130 |
+
"step": 35600
|
| 6131 |
+
},
|
| 6132 |
+
{
|
| 6133 |
+
"epoch": 2.742096761787555,
|
| 6134 |
+
"grad_norm": 0.7603423595428467,
|
| 6135 |
+
"learning_rate": 1.7499935069994548e-05,
|
| 6136 |
+
"loss": 1.0034,
|
| 6137 |
+
"step": 35650
|
| 6138 |
+
},
|
| 6139 |
+
{
|
| 6140 |
+
"epoch": 2.745942619798477,
|
| 6141 |
+
"grad_norm": 0.4625702202320099,
|
| 6142 |
+
"learning_rate": 1.7240215048178064e-05,
|
| 6143 |
+
"loss": 1.0123,
|
| 6144 |
+
"step": 35700
|
| 6145 |
+
},
|
| 6146 |
+
{
|
| 6147 |
+
"epoch": 2.7497884778093993,
|
| 6148 |
+
"grad_norm": 1.4387953281402588,
|
| 6149 |
+
"learning_rate": 1.6980495026361584e-05,
|
| 6150 |
+
"loss": 0.9704,
|
| 6151 |
+
"step": 35750
|
| 6152 |
+
},
|
| 6153 |
+
{
|
| 6154 |
+
"epoch": 2.7497884778093993,
|
| 6155 |
+
"eval_loss": 0.9952225685119629,
|
| 6156 |
+
"eval_runtime": 17.4458,
|
| 6157 |
+
"eval_samples_per_second": 57.32,
|
| 6158 |
+
"eval_steps_per_second": 14.33,
|
| 6159 |
+
"step": 35750
|
| 6160 |
+
},
|
| 6161 |
+
{
|
| 6162 |
+
"epoch": 2.7536343358203217,
|
| 6163 |
+
"grad_norm": 0.6899126172065735,
|
| 6164 |
+
"learning_rate": 1.67207750045451e-05,
|
| 6165 |
+
"loss": 0.9627,
|
| 6166 |
+
"step": 35800
|
| 6167 |
+
},
|
| 6168 |
+
{
|
| 6169 |
+
"epoch": 2.7574801938312437,
|
| 6170 |
+
"grad_norm": 1.0329424142837524,
|
| 6171 |
+
"learning_rate": 1.646105498272862e-05,
|
| 6172 |
+
"loss": 0.9207,
|
| 6173 |
+
"step": 35850
|
| 6174 |
+
},
|
| 6175 |
+
{
|
| 6176 |
+
"epoch": 2.761326051842166,
|
| 6177 |
+
"grad_norm": 1.1055504083633423,
|
| 6178 |
+
"learning_rate": 1.6201334960912136e-05,
|
| 6179 |
+
"loss": 0.9834,
|
| 6180 |
+
"step": 35900
|
| 6181 |
+
},
|
| 6182 |
+
{
|
| 6183 |
+
"epoch": 2.7651719098530885,
|
| 6184 |
+
"grad_norm": 0.7458188533782959,
|
| 6185 |
+
"learning_rate": 1.5941614939095655e-05,
|
| 6186 |
+
"loss": 1.003,
|
| 6187 |
+
"step": 35950
|
| 6188 |
+
},
|
| 6189 |
+
{
|
| 6190 |
+
"epoch": 2.7690177678640104,
|
| 6191 |
+
"grad_norm": 1.112021803855896,
|
| 6192 |
+
"learning_rate": 1.5681894917279175e-05,
|
| 6193 |
+
"loss": 1.0001,
|
| 6194 |
+
"step": 36000
|
| 6195 |
+
},
|
| 6196 |
+
{
|
| 6197 |
+
"epoch": 2.7690177678640104,
|
| 6198 |
+
"eval_loss": 0.9910063147544861,
|
| 6199 |
+
"eval_runtime": 17.2718,
|
| 6200 |
+
"eval_samples_per_second": 57.898,
|
| 6201 |
+
"eval_steps_per_second": 14.474,
|
| 6202 |
+
"step": 36000
|
| 6203 |
}
|
| 6204 |
],
|
| 6205 |
"logging_steps": 50,
|