Training in progress, step 8000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8feea685a107f8ccd353fa0c6a7247dc35bf0b9ab63a5ddc15a9bb4d1290d199
|
| 3 |
size 36730224
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66b6610111b8d0f3342d2760bc32d4c23e23f0a918f9dd58106e340817cbdd89
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d6bc30a519d7cae432bede0d2778a805102f7cbeae9a244275827f72067499b
|
| 3 |
+
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfe40af48d21f2cca8886bf415fbb1d77078d013ee962c12ffd6bac84ee28801
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d34cb07567bc31e1dbbaa78fe1d8a500cecd9a370ce5d35295b9afb817d2e6c
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 7250,
|
| 3 |
"best_metric": 4.4039154052734375,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-7000",
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1298,6 +1298,92 @@
|
|
| 1298 |
"eval_samples_per_second": 53.19,
|
| 1299 |
"eval_steps_per_second": 13.298,
|
| 1300 |
"step": 7500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1301 |
}
|
| 1302 |
],
|
| 1303 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 7250,
|
| 3 |
"best_metric": 4.4039154052734375,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-7000",
|
| 5 |
+
"epoch": 0.6153372817475579,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 8000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1298 |
"eval_samples_per_second": 53.19,
|
| 1299 |
"eval_steps_per_second": 13.298,
|
| 1300 |
"step": 7500
|
| 1301 |
+
},
|
| 1302 |
+
{
|
| 1303 |
+
"epoch": 0.5807245596492577,
|
| 1304 |
+
"grad_norm": 3.4961936473846436,
|
| 1305 |
+
"learning_rate": 0.00010170214330355501,
|
| 1306 |
+
"loss": 4.5194,
|
| 1307 |
+
"step": 7550
|
| 1308 |
+
},
|
| 1309 |
+
{
|
| 1310 |
+
"epoch": 0.58457041766018,
|
| 1311 |
+
"grad_norm": 2.529500961303711,
|
| 1312 |
+
"learning_rate": 0.00010144479982353589,
|
| 1313 |
+
"loss": 4.3337,
|
| 1314 |
+
"step": 7600
|
| 1315 |
+
},
|
| 1316 |
+
{
|
| 1317 |
+
"epoch": 0.5884162756711022,
|
| 1318 |
+
"grad_norm": 3.346160888671875,
|
| 1319 |
+
"learning_rate": 0.00010118745634351679,
|
| 1320 |
+
"loss": 4.5422,
|
| 1321 |
+
"step": 7650
|
| 1322 |
+
},
|
| 1323 |
+
{
|
| 1324 |
+
"epoch": 0.5922621336820244,
|
| 1325 |
+
"grad_norm": 3.8311049938201904,
|
| 1326 |
+
"learning_rate": 0.00010093011286349765,
|
| 1327 |
+
"loss": 4.4191,
|
| 1328 |
+
"step": 7700
|
| 1329 |
+
},
|
| 1330 |
+
{
|
| 1331 |
+
"epoch": 0.5961079916929467,
|
| 1332 |
+
"grad_norm": 4.324901580810547,
|
| 1333 |
+
"learning_rate": 0.00010067276938347853,
|
| 1334 |
+
"loss": 4.4613,
|
| 1335 |
+
"step": 7750
|
| 1336 |
+
},
|
| 1337 |
+
{
|
| 1338 |
+
"epoch": 0.5961079916929467,
|
| 1339 |
+
"eval_loss": 4.4118547439575195,
|
| 1340 |
+
"eval_runtime": 18.9517,
|
| 1341 |
+
"eval_samples_per_second": 52.766,
|
| 1342 |
+
"eval_steps_per_second": 13.191,
|
| 1343 |
+
"step": 7750
|
| 1344 |
+
},
|
| 1345 |
+
{
|
| 1346 |
+
"epoch": 0.5999538497038689,
|
| 1347 |
+
"grad_norm": 3.888192653656006,
|
| 1348 |
+
"learning_rate": 0.00010041542590345943,
|
| 1349 |
+
"loss": 4.5492,
|
| 1350 |
+
"step": 7800
|
| 1351 |
+
},
|
| 1352 |
+
{
|
| 1353 |
+
"epoch": 0.6037997077147912,
|
| 1354 |
+
"grad_norm": 2.718320608139038,
|
| 1355 |
+
"learning_rate": 0.0001001580824234403,
|
| 1356 |
+
"loss": 4.5371,
|
| 1357 |
+
"step": 7850
|
| 1358 |
+
},
|
| 1359 |
+
{
|
| 1360 |
+
"epoch": 0.6076455657257134,
|
| 1361 |
+
"grad_norm": 3.5970869064331055,
|
| 1362 |
+
"learning_rate": 9.99007389434212e-05,
|
| 1363 |
+
"loss": 4.4835,
|
| 1364 |
+
"step": 7900
|
| 1365 |
+
},
|
| 1366 |
+
{
|
| 1367 |
+
"epoch": 0.6114914237366357,
|
| 1368 |
+
"grad_norm": 4.563399314880371,
|
| 1369 |
+
"learning_rate": 9.964339546340208e-05,
|
| 1370 |
+
"loss": 4.4494,
|
| 1371 |
+
"step": 7950
|
| 1372 |
+
},
|
| 1373 |
+
{
|
| 1374 |
+
"epoch": 0.6153372817475579,
|
| 1375 |
+
"grad_norm": 5.080177307128906,
|
| 1376 |
+
"learning_rate": 9.938605198338294e-05,
|
| 1377 |
+
"loss": 4.6072,
|
| 1378 |
+
"step": 8000
|
| 1379 |
+
},
|
| 1380 |
+
{
|
| 1381 |
+
"epoch": 0.6153372817475579,
|
| 1382 |
+
"eval_loss": 4.428142547607422,
|
| 1383 |
+
"eval_runtime": 18.8815,
|
| 1384 |
+
"eval_samples_per_second": 52.962,
|
| 1385 |
+
"eval_steps_per_second": 13.241,
|
| 1386 |
+
"step": 8000
|
| 1387 |
}
|
| 1388 |
],
|
| 1389 |
"logging_steps": 50,
|