Training in progress, step 9000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:242d478bdcce64189f3b58031019151e1a18c1af95ac728b5a747bf09e32e7a9
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2aac51b2ab82ac87e0ebdf687ae863be82703b3f31a5c8357ea90e2e967391af
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:743ddc3d3453dab54079df809921a1396d6ae9822548c4cca99b22b4380fa013
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b68f148982346537acf196edf0aa44542990dee8efc3893aa00dae2ca2e993b5
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fec606e444ea63edc032c8bc6a79a748b3d50629f93eb3c42816c2d5bd5ac94
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a408e7faa4c20c8bb6b17b32448bbaa3e8be76b1ace53db10640cd020e42083
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 8500,
|
| 3 |
"best_metric": 4.409055709838867,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-8500",
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1470,6 +1470,92 @@
|
|
| 1470 |
"eval_samples_per_second": 53.78,
|
| 1471 |
"eval_steps_per_second": 13.445,
|
| 1472 |
"step": 8500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1473 |
}
|
| 1474 |
],
|
| 1475 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 8500,
|
| 3 |
"best_metric": 4.409055709838867,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-8500",
|
| 5 |
+
"epoch": 0.6922544419660026,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 9000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1470 |
"eval_samples_per_second": 53.78,
|
| 1471 |
"eval_steps_per_second": 13.445,
|
| 1472 |
"step": 8500
|
| 1473 |
+
},
|
| 1474 |
+
{
|
| 1475 |
+
"epoch": 0.6576417198677025,
|
| 1476 |
+
"grad_norm": 2.7313661575317383,
|
| 1477 |
+
"learning_rate": 0.00015823702049190975,
|
| 1478 |
+
"loss": 4.4721,
|
| 1479 |
+
"step": 8550
|
| 1480 |
+
},
|
| 1481 |
+
{
|
| 1482 |
+
"epoch": 0.6614875778786247,
|
| 1483 |
+
"grad_norm": 4.160475730895996,
|
| 1484 |
+
"learning_rate": 0.00015797730047009326,
|
| 1485 |
+
"loss": 4.501,
|
| 1486 |
+
"step": 8600
|
| 1487 |
+
},
|
| 1488 |
+
{
|
| 1489 |
+
"epoch": 0.665333435889547,
|
| 1490 |
+
"grad_norm": 11.54045581817627,
|
| 1491 |
+
"learning_rate": 0.00015771758044827675,
|
| 1492 |
+
"loss": 4.4433,
|
| 1493 |
+
"step": 8650
|
| 1494 |
+
},
|
| 1495 |
+
{
|
| 1496 |
+
"epoch": 0.6691792939004692,
|
| 1497 |
+
"grad_norm": 4.087617874145508,
|
| 1498 |
+
"learning_rate": 0.00015745786042646029,
|
| 1499 |
+
"loss": 4.4981,
|
| 1500 |
+
"step": 8700
|
| 1501 |
+
},
|
| 1502 |
+
{
|
| 1503 |
+
"epoch": 0.6730251519113915,
|
| 1504 |
+
"grad_norm": 4.155121803283691,
|
| 1505 |
+
"learning_rate": 0.0001571981404046438,
|
| 1506 |
+
"loss": 4.3874,
|
| 1507 |
+
"step": 8750
|
| 1508 |
+
},
|
| 1509 |
+
{
|
| 1510 |
+
"epoch": 0.6730251519113915,
|
| 1511 |
+
"eval_loss": 4.418811321258545,
|
| 1512 |
+
"eval_runtime": 18.6306,
|
| 1513 |
+
"eval_samples_per_second": 53.675,
|
| 1514 |
+
"eval_steps_per_second": 13.419,
|
| 1515 |
+
"step": 8750
|
| 1516 |
+
},
|
| 1517 |
+
{
|
| 1518 |
+
"epoch": 0.6768710099223136,
|
| 1519 |
+
"grad_norm": 4.071916580200195,
|
| 1520 |
+
"learning_rate": 0.0001569384203828273,
|
| 1521 |
+
"loss": 4.5531,
|
| 1522 |
+
"step": 8800
|
| 1523 |
+
},
|
| 1524 |
+
{
|
| 1525 |
+
"epoch": 0.6807168679332359,
|
| 1526 |
+
"grad_norm": 3.395460605621338,
|
| 1527 |
+
"learning_rate": 0.00015667870036101085,
|
| 1528 |
+
"loss": 4.4609,
|
| 1529 |
+
"step": 8850
|
| 1530 |
+
},
|
| 1531 |
+
{
|
| 1532 |
+
"epoch": 0.6845627259441581,
|
| 1533 |
+
"grad_norm": 3.4933230876922607,
|
| 1534 |
+
"learning_rate": 0.00015641898033919436,
|
| 1535 |
+
"loss": 4.4536,
|
| 1536 |
+
"step": 8900
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 0.6884085839550804,
|
| 1540 |
+
"grad_norm": 6.921072483062744,
|
| 1541 |
+
"learning_rate": 0.00015615926031737788,
|
| 1542 |
+
"loss": 4.3478,
|
| 1543 |
+
"step": 8950
|
| 1544 |
+
},
|
| 1545 |
+
{
|
| 1546 |
+
"epoch": 0.6922544419660026,
|
| 1547 |
+
"grad_norm": 3.920626401901245,
|
| 1548 |
+
"learning_rate": 0.0001558995402955614,
|
| 1549 |
+
"loss": 4.3761,
|
| 1550 |
+
"step": 9000
|
| 1551 |
+
},
|
| 1552 |
+
{
|
| 1553 |
+
"epoch": 0.6922544419660026,
|
| 1554 |
+
"eval_loss": 4.415992259979248,
|
| 1555 |
+
"eval_runtime": 18.5147,
|
| 1556 |
+
"eval_samples_per_second": 54.011,
|
| 1557 |
+
"eval_steps_per_second": 13.503,
|
| 1558 |
+
"step": 9000
|
| 1559 |
}
|
| 1560 |
],
|
| 1561 |
"logging_steps": 50,
|