Training in progress, step 8000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8cf14ec041a3170fc93bd13962e58c3433da275c26e3ddcd248f44fe9261656
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5afd0a70049fd01ffc5b0b2af6dbbfcaccc76963457c541c72cb41caeec8cf4
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1383466fc8ec131ed5aea99415fe444f7a641621908ee8efda1f95cd39e73d3d
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:565e9a1f20b4e27434c54601d243feb5ec67c27b2d167d2e9e70202e4cc1bb10
|
| 3 |
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4875a6ebf2ebe9f7eec596109836a4abaa1f225d707484c4b3129f5845c484e
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:763a8e91c3a700b449d0501649e8a0112790ca7b7dbb67a7e5e5dcfd39db55d8
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1298,6 +1298,92 @@
|
|
| 1298 |
"eval_samples_per_second": 59.328,
|
| 1299 |
"eval_steps_per_second": 14.832,
|
| 1300 |
"step": 7500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1301 |
}
|
| 1302 |
],
|
| 1303 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 8000,
|
| 3 |
+
"best_metric": 1.410463809967041,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-8000",
|
| 5 |
+
"epoch": 0.6153372817475579,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 8000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1298 |
"eval_samples_per_second": 59.328,
|
| 1299 |
"eval_steps_per_second": 14.832,
|
| 1300 |
"step": 7500
|
| 1301 |
+
},
|
| 1302 |
+
{
|
| 1303 |
+
"epoch": 0.5807245596492577,
|
| 1304 |
+
"grad_norm": 1.6724326610565186,
|
| 1305 |
+
"learning_rate": 0.00016341064332649406,
|
| 1306 |
+
"loss": 1.4254,
|
| 1307 |
+
"step": 7550
|
| 1308 |
+
},
|
| 1309 |
+
{
|
| 1310 |
+
"epoch": 0.58457041766018,
|
| 1311 |
+
"grad_norm": 0.871095597743988,
|
| 1312 |
+
"learning_rate": 0.00016315092330467757,
|
| 1313 |
+
"loss": 1.3679,
|
| 1314 |
+
"step": 7600
|
| 1315 |
+
},
|
| 1316 |
+
{
|
| 1317 |
+
"epoch": 0.5884162756711022,
|
| 1318 |
+
"grad_norm": 1.2308543920516968,
|
| 1319 |
+
"learning_rate": 0.00016289120328286108,
|
| 1320 |
+
"loss": 1.4699,
|
| 1321 |
+
"step": 7650
|
| 1322 |
+
},
|
| 1323 |
+
{
|
| 1324 |
+
"epoch": 0.5922621336820244,
|
| 1325 |
+
"grad_norm": 1.0057512521743774,
|
| 1326 |
+
"learning_rate": 0.0001626314832610446,
|
| 1327 |
+
"loss": 1.3838,
|
| 1328 |
+
"step": 7700
|
| 1329 |
+
},
|
| 1330 |
+
{
|
| 1331 |
+
"epoch": 0.5961079916929467,
|
| 1332 |
+
"grad_norm": 2.5127899646759033,
|
| 1333 |
+
"learning_rate": 0.0001623717632392281,
|
| 1334 |
+
"loss": 1.4696,
|
| 1335 |
+
"step": 7750
|
| 1336 |
+
},
|
| 1337 |
+
{
|
| 1338 |
+
"epoch": 0.5961079916929467,
|
| 1339 |
+
"eval_loss": 1.4130176305770874,
|
| 1340 |
+
"eval_runtime": 16.9059,
|
| 1341 |
+
"eval_samples_per_second": 59.151,
|
| 1342 |
+
"eval_steps_per_second": 14.788,
|
| 1343 |
+
"step": 7750
|
| 1344 |
+
},
|
| 1345 |
+
{
|
| 1346 |
+
"epoch": 0.5999538497038689,
|
| 1347 |
+
"grad_norm": 0.9064082503318787,
|
| 1348 |
+
"learning_rate": 0.00016211204321741165,
|
| 1349 |
+
"loss": 1.4517,
|
| 1350 |
+
"step": 7800
|
| 1351 |
+
},
|
| 1352 |
+
{
|
| 1353 |
+
"epoch": 0.6037997077147912,
|
| 1354 |
+
"grad_norm": 0.8912540674209595,
|
| 1355 |
+
"learning_rate": 0.00016185232319559516,
|
| 1356 |
+
"loss": 1.4393,
|
| 1357 |
+
"step": 7850
|
| 1358 |
+
},
|
| 1359 |
+
{
|
| 1360 |
+
"epoch": 0.6076455657257134,
|
| 1361 |
+
"grad_norm": 1.231679916381836,
|
| 1362 |
+
"learning_rate": 0.00016159260317377867,
|
| 1363 |
+
"loss": 1.4221,
|
| 1364 |
+
"step": 7900
|
| 1365 |
+
},
|
| 1366 |
+
{
|
| 1367 |
+
"epoch": 0.6114914237366357,
|
| 1368 |
+
"grad_norm": 0.9336662292480469,
|
| 1369 |
+
"learning_rate": 0.0001613328831519622,
|
| 1370 |
+
"loss": 1.4739,
|
| 1371 |
+
"step": 7950
|
| 1372 |
+
},
|
| 1373 |
+
{
|
| 1374 |
+
"epoch": 0.6153372817475579,
|
| 1375 |
+
"grad_norm": 1.313412070274353,
|
| 1376 |
+
"learning_rate": 0.0001610731631301457,
|
| 1377 |
+
"loss": 1.4792,
|
| 1378 |
+
"step": 8000
|
| 1379 |
+
},
|
| 1380 |
+
{
|
| 1381 |
+
"epoch": 0.6153372817475579,
|
| 1382 |
+
"eval_loss": 1.410463809967041,
|
| 1383 |
+
"eval_runtime": 16.8522,
|
| 1384 |
+
"eval_samples_per_second": 59.34,
|
| 1385 |
+
"eval_steps_per_second": 14.835,
|
| 1386 |
+
"step": 8000
|
| 1387 |
}
|
| 1388 |
],
|
| 1389 |
"logging_steps": 50,
|