Training in progress, step 36000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16887918a2514cde38397627644e79ad7a7cc859123d9755cbcacbf31a8c9f7d
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc4a7c0e93c6189640f847962e46079ebcf94653f638d4fdd14079010baf0e65
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:266a495e1a502f1d39b57b5fcb15d594e7ebb88b9a37c10fcde88037b918f14e
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c600e0bd51e53276481f2f5ca4e889540a7504baebe1c05ac1ae4c81b5b17f3
|
| 3 |
+
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49ec2d3f3b18676d40b30b5547947dd759a32e3455d479b941fbf8c1dd3301d4
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:906c43e30b9edc8f130b3f77317e01f206c6e5f1267c22ca4899b9c5968ba8ca
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 35500,
|
| 3 |
"best_metric": -30.469755172729492,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-35500",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6114,6 +6114,92 @@
|
|
| 6114 |
"eval_samples_per_second": 59.371,
|
| 6115 |
"eval_steps_per_second": 14.843,
|
| 6116 |
"step": 35500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6117 |
}
|
| 6118 |
],
|
| 6119 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 35500,
|
| 3 |
"best_metric": -30.469755172729492,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-35500",
|
| 5 |
+
"epoch": 2.7690177678640104,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 36000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6114 |
"eval_samples_per_second": 59.371,
|
| 6115 |
"eval_steps_per_second": 14.843,
|
| 6116 |
"step": 35500
|
| 6117 |
+
},
|
| 6118 |
+
{
|
| 6119 |
+
"epoch": 2.73440504576571,
|
| 6120 |
+
"grad_norm": 1.0075780153274536,
|
| 6121 |
+
"learning_rate": 1.801937511362751e-05,
|
| 6122 |
+
"loss": -30.4655,
|
| 6123 |
+
"step": 35550
|
| 6124 |
+
},
|
| 6125 |
+
{
|
| 6126 |
+
"epoch": 2.7382509037766325,
|
| 6127 |
+
"grad_norm": 1.856740117073059,
|
| 6128 |
+
"learning_rate": 1.775965509181103e-05,
|
| 6129 |
+
"loss": -30.4653,
|
| 6130 |
+
"step": 35600
|
| 6131 |
+
},
|
| 6132 |
+
{
|
| 6133 |
+
"epoch": 2.742096761787555,
|
| 6134 |
+
"grad_norm": 0.6788634657859802,
|
| 6135 |
+
"learning_rate": 1.7499935069994548e-05,
|
| 6136 |
+
"loss": -30.4655,
|
| 6137 |
+
"step": 35650
|
| 6138 |
+
},
|
| 6139 |
+
{
|
| 6140 |
+
"epoch": 2.745942619798477,
|
| 6141 |
+
"grad_norm": 0.9009542465209961,
|
| 6142 |
+
"learning_rate": 1.7240215048178064e-05,
|
| 6143 |
+
"loss": -30.4656,
|
| 6144 |
+
"step": 35700
|
| 6145 |
+
},
|
| 6146 |
+
{
|
| 6147 |
+
"epoch": 2.7497884778093993,
|
| 6148 |
+
"grad_norm": 1.9731862545013428,
|
| 6149 |
+
"learning_rate": 1.6980495026361584e-05,
|
| 6150 |
+
"loss": -30.4643,
|
| 6151 |
+
"step": 35750
|
| 6152 |
+
},
|
| 6153 |
+
{
|
| 6154 |
+
"epoch": 2.7497884778093993,
|
| 6155 |
+
"eval_loss": -30.46780014038086,
|
| 6156 |
+
"eval_runtime": 16.8878,
|
| 6157 |
+
"eval_samples_per_second": 59.214,
|
| 6158 |
+
"eval_steps_per_second": 14.804,
|
| 6159 |
+
"step": 35750
|
| 6160 |
+
},
|
| 6161 |
+
{
|
| 6162 |
+
"epoch": 2.7536343358203217,
|
| 6163 |
+
"grad_norm": 1.0164039134979248,
|
| 6164 |
+
"learning_rate": 1.67207750045451e-05,
|
| 6165 |
+
"loss": -30.4655,
|
| 6166 |
+
"step": 35800
|
| 6167 |
+
},
|
| 6168 |
+
{
|
| 6169 |
+
"epoch": 2.7574801938312437,
|
| 6170 |
+
"grad_norm": 1.1127376556396484,
|
| 6171 |
+
"learning_rate": 1.646105498272862e-05,
|
| 6172 |
+
"loss": -30.4657,
|
| 6173 |
+
"step": 35850
|
| 6174 |
+
},
|
| 6175 |
+
{
|
| 6176 |
+
"epoch": 2.761326051842166,
|
| 6177 |
+
"grad_norm": 1.2492812871932983,
|
| 6178 |
+
"learning_rate": 1.6201334960912136e-05,
|
| 6179 |
+
"loss": -30.4659,
|
| 6180 |
+
"step": 35900
|
| 6181 |
+
},
|
| 6182 |
+
{
|
| 6183 |
+
"epoch": 2.7651719098530885,
|
| 6184 |
+
"grad_norm": 0.944760799407959,
|
| 6185 |
+
"learning_rate": 1.5941614939095655e-05,
|
| 6186 |
+
"loss": -30.4654,
|
| 6187 |
+
"step": 35950
|
| 6188 |
+
},
|
| 6189 |
+
{
|
| 6190 |
+
"epoch": 2.7690177678640104,
|
| 6191 |
+
"grad_norm": 0.7091095447540283,
|
| 6192 |
+
"learning_rate": 1.5681894917279175e-05,
|
| 6193 |
+
"loss": -30.4654,
|
| 6194 |
+
"step": 36000
|
| 6195 |
+
},
|
| 6196 |
+
{
|
| 6197 |
+
"epoch": 2.7690177678640104,
|
| 6198 |
+
"eval_loss": -30.46934700012207,
|
| 6199 |
+
"eval_runtime": 16.7482,
|
| 6200 |
+
"eval_samples_per_second": 59.708,
|
| 6201 |
+
"eval_steps_per_second": 14.927,
|
| 6202 |
+
"step": 36000
|
| 6203 |
}
|
| 6204 |
],
|
| 6205 |
"logging_steps": 50,
|