Training in progress, step 7000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52b997298d45a4fc6cd35ecfe5ccf8a749558ac28f75afb2d0909fe1d125004b
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f95ce72a1c3a7c4d5aab7200d9dec95f462be645783ddec5e51a5155488e47cc
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:215f3ea94412e8b86475daca81b7346b3039dd518294dd8628b764b4fe24e130
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:517f7f125f44f7edc89c0d28a191b1cd6fb9d0b94d17a4ddcd53f5fcaa8aefcd
|
| 3 |
+
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7df83cd74eb098f609a6963afc6cf853a656c30456c390df44a7c43c2165d798
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93f284fcd12d6e29e5cbdf1e4a0021a53156461eb5dba25291a9ab08dfb3c1a8
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1126,6 +1126,92 @@
|
|
| 1126 |
"eval_samples_per_second": 59.703,
|
| 1127 |
"eval_steps_per_second": 14.926,
|
| 1128 |
"step": 6500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1129 |
}
|
| 1130 |
],
|
| 1131 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 7000,
|
| 3 |
+
"best_metric": 1.4396251440048218,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-7000",
|
| 5 |
+
"epoch": 0.5384201215291131,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 7000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1126 |
"eval_samples_per_second": 59.703,
|
| 1127 |
"eval_steps_per_second": 14.926,
|
| 1128 |
"step": 6500
|
| 1129 |
+
},
|
| 1130 |
+
{
|
| 1131 |
+
"epoch": 0.503807399430813,
|
| 1132 |
+
"grad_norm": 1.6324172019958496,
|
| 1133 |
+
"learning_rate": 0.00016859984936238735,
|
| 1134 |
+
"loss": 1.5181,
|
| 1135 |
+
"step": 6550
|
| 1136 |
+
},
|
| 1137 |
+
{
|
| 1138 |
+
"epoch": 0.5076532574417353,
|
| 1139 |
+
"grad_norm": 2.2709999084472656,
|
| 1140 |
+
"learning_rate": 0.00016834012934057086,
|
| 1141 |
+
"loss": 1.4517,
|
| 1142 |
+
"step": 6600
|
| 1143 |
+
},
|
| 1144 |
+
{
|
| 1145 |
+
"epoch": 0.5114991154526575,
|
| 1146 |
+
"grad_norm": 1.0540615320205688,
|
| 1147 |
+
"learning_rate": 0.0001680804093187544,
|
| 1148 |
+
"loss": 1.4737,
|
| 1149 |
+
"step": 6650
|
| 1150 |
+
},
|
| 1151 |
+
{
|
| 1152 |
+
"epoch": 0.5153449734635798,
|
| 1153 |
+
"grad_norm": 1.497239589691162,
|
| 1154 |
+
"learning_rate": 0.00016782068929693792,
|
| 1155 |
+
"loss": 1.4038,
|
| 1156 |
+
"step": 6700
|
| 1157 |
+
},
|
| 1158 |
+
{
|
| 1159 |
+
"epoch": 0.519190831474502,
|
| 1160 |
+
"grad_norm": 1.5478484630584717,
|
| 1161 |
+
"learning_rate": 0.00016756096927512143,
|
| 1162 |
+
"loss": 1.5019,
|
| 1163 |
+
"step": 6750
|
| 1164 |
+
},
|
| 1165 |
+
{
|
| 1166 |
+
"epoch": 0.519190831474502,
|
| 1167 |
+
"eval_loss": 1.446391224861145,
|
| 1168 |
+
"eval_runtime": 16.8309,
|
| 1169 |
+
"eval_samples_per_second": 59.415,
|
| 1170 |
+
"eval_steps_per_second": 14.854,
|
| 1171 |
+
"step": 6750
|
| 1172 |
+
},
|
| 1173 |
+
{
|
| 1174 |
+
"epoch": 0.5230366894854241,
|
| 1175 |
+
"grad_norm": 1.1217519044876099,
|
| 1176 |
+
"learning_rate": 0.00016730124925330494,
|
| 1177 |
+
"loss": 1.3677,
|
| 1178 |
+
"step": 6800
|
| 1179 |
+
},
|
| 1180 |
+
{
|
| 1181 |
+
"epoch": 0.5268825474963464,
|
| 1182 |
+
"grad_norm": 1.8287678956985474,
|
| 1183 |
+
"learning_rate": 0.00016704152923148846,
|
| 1184 |
+
"loss": 1.4758,
|
| 1185 |
+
"step": 6850
|
| 1186 |
+
},
|
| 1187 |
+
{
|
| 1188 |
+
"epoch": 0.5307284055072686,
|
| 1189 |
+
"grad_norm": 2.8644931316375732,
|
| 1190 |
+
"learning_rate": 0.000166781809209672,
|
| 1191 |
+
"loss": 1.4328,
|
| 1192 |
+
"step": 6900
|
| 1193 |
+
},
|
| 1194 |
+
{
|
| 1195 |
+
"epoch": 0.5345742635181909,
|
| 1196 |
+
"grad_norm": 0.8893502354621887,
|
| 1197 |
+
"learning_rate": 0.0001665220891878555,
|
| 1198 |
+
"loss": 1.4648,
|
| 1199 |
+
"step": 6950
|
| 1200 |
+
},
|
| 1201 |
+
{
|
| 1202 |
+
"epoch": 0.5384201215291131,
|
| 1203 |
+
"grad_norm": 1.1741907596588135,
|
| 1204 |
+
"learning_rate": 0.00016626236916603902,
|
| 1205 |
+
"loss": 1.4338,
|
| 1206 |
+
"step": 7000
|
| 1207 |
+
},
|
| 1208 |
+
{
|
| 1209 |
+
"epoch": 0.5384201215291131,
|
| 1210 |
+
"eval_loss": 1.4396251440048218,
|
| 1211 |
+
"eval_runtime": 16.8849,
|
| 1212 |
+
"eval_samples_per_second": 59.225,
|
| 1213 |
+
"eval_steps_per_second": 14.806,
|
| 1214 |
+
"step": 7000
|
| 1215 |
}
|
| 1216 |
],
|
| 1217 |
"logging_steps": 50,
|