Training in progress, step 9000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d41b0a6b7b93326606bd1adb6fe554c4f3a4896093e8d2cda0d85615659fbea9
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61e6388870be8d5091de8ee4171a40d8d34fd90416acec89a4530ab1810d1d11
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8114c7619958e289ae818af20bbbb40ca496ec99d1d2cf5336f332be768bd676
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f82ab7544ab0f2c7299f888437f6d772f90becf0f776876608740887a96023f
|
| 3 |
+
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c86e9949dede8e9039d4479cc9a7b6de2de62acc5bc9bce167cc78f0df5d789
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67134d509d59e4154961ca89be855a6a8dd7ecee21023e7a214d3f3e40df8ac6
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-8500",
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1470,6 +1470,92 @@
|
|
| 1470 |
"eval_samples_per_second": 58.954,
|
| 1471 |
"eval_steps_per_second": 14.739,
|
| 1472 |
"step": 8500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1473 |
}
|
| 1474 |
],
|
| 1475 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 8750,
|
| 3 |
+
"best_metric": 1.3920339345932007,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-8500",
|
| 5 |
+
"epoch": 0.6922544419660026,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 9000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1470 |
"eval_samples_per_second": 58.954,
|
| 1471 |
"eval_steps_per_second": 14.739,
|
| 1472 |
"step": 8500
|
| 1473 |
+
},
|
| 1474 |
+
{
|
| 1475 |
+
"epoch": 0.6576417198677025,
|
| 1476 |
+
"grad_norm": 0.8412348628044128,
|
| 1477 |
+
"learning_rate": 0.00015821624289016442,
|
| 1478 |
+
"loss": 1.4098,
|
| 1479 |
+
"step": 8550
|
| 1480 |
+
},
|
| 1481 |
+
{
|
| 1482 |
+
"epoch": 0.6614875778786247,
|
| 1483 |
+
"grad_norm": 1.3487842082977295,
|
| 1484 |
+
"learning_rate": 0.00015795652286834793,
|
| 1485 |
+
"loss": 1.4095,
|
| 1486 |
+
"step": 8600
|
| 1487 |
+
},
|
| 1488 |
+
{
|
| 1489 |
+
"epoch": 0.665333435889547,
|
| 1490 |
+
"grad_norm": 1.1506551504135132,
|
| 1491 |
+
"learning_rate": 0.00015769680284653145,
|
| 1492 |
+
"loss": 1.4009,
|
| 1493 |
+
"step": 8650
|
| 1494 |
+
},
|
| 1495 |
+
{
|
| 1496 |
+
"epoch": 0.6691792939004692,
|
| 1497 |
+
"grad_norm": 1.1333333253860474,
|
| 1498 |
+
"learning_rate": 0.00015743708282471496,
|
| 1499 |
+
"loss": 1.4042,
|
| 1500 |
+
"step": 8700
|
| 1501 |
+
},
|
| 1502 |
+
{
|
| 1503 |
+
"epoch": 0.6730251519113915,
|
| 1504 |
+
"grad_norm": 1.1276965141296387,
|
| 1505 |
+
"learning_rate": 0.00015717736280289847,
|
| 1506 |
+
"loss": 1.4143,
|
| 1507 |
+
"step": 8750
|
| 1508 |
+
},
|
| 1509 |
+
{
|
| 1510 |
+
"epoch": 0.6730251519113915,
|
| 1511 |
+
"eval_loss": 1.3920339345932007,
|
| 1512 |
+
"eval_runtime": 16.7893,
|
| 1513 |
+
"eval_samples_per_second": 59.562,
|
| 1514 |
+
"eval_steps_per_second": 14.89,
|
| 1515 |
+
"step": 8750
|
| 1516 |
+
},
|
| 1517 |
+
{
|
| 1518 |
+
"epoch": 0.6768710099223136,
|
| 1519 |
+
"grad_norm": 1.3561677932739258,
|
| 1520 |
+
"learning_rate": 0.000156917642781082,
|
| 1521 |
+
"loss": 1.4228,
|
| 1522 |
+
"step": 8800
|
| 1523 |
+
},
|
| 1524 |
+
{
|
| 1525 |
+
"epoch": 0.6807168679332359,
|
| 1526 |
+
"grad_norm": 1.4107307195663452,
|
| 1527 |
+
"learning_rate": 0.00015665792275926552,
|
| 1528 |
+
"loss": 1.4227,
|
| 1529 |
+
"step": 8850
|
| 1530 |
+
},
|
| 1531 |
+
{
|
| 1532 |
+
"epoch": 0.6845627259441581,
|
| 1533 |
+
"grad_norm": 1.4296494722366333,
|
| 1534 |
+
"learning_rate": 0.00015639820273744904,
|
| 1535 |
+
"loss": 1.3846,
|
| 1536 |
+
"step": 8900
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 0.6884085839550804,
|
| 1540 |
+
"grad_norm": 1.9556208848953247,
|
| 1541 |
+
"learning_rate": 0.00015613848271563255,
|
| 1542 |
+
"loss": 1.3799,
|
| 1543 |
+
"step": 8950
|
| 1544 |
+
},
|
| 1545 |
+
{
|
| 1546 |
+
"epoch": 0.6922544419660026,
|
| 1547 |
+
"grad_norm": 0.8003421425819397,
|
| 1548 |
+
"learning_rate": 0.00015587876269381606,
|
| 1549 |
+
"loss": 1.3924,
|
| 1550 |
+
"step": 9000
|
| 1551 |
+
},
|
| 1552 |
+
{
|
| 1553 |
+
"epoch": 0.6922544419660026,
|
| 1554 |
+
"eval_loss": 1.4001529216766357,
|
| 1555 |
+
"eval_runtime": 16.7639,
|
| 1556 |
+
"eval_samples_per_second": 59.652,
|
| 1557 |
+
"eval_steps_per_second": 14.913,
|
| 1558 |
+
"step": 9000
|
| 1559 |
}
|
| 1560 |
],
|
| 1561 |
"logging_steps": 50,
|