Training in progress, step 9000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eecb66b5b687b2480b18fa49a3a7093840558d372a33d29f18cc9ddc9d5973f6
|
| 3 |
size 36730224
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:246b4fcf33df56c5b498c44dfc6d12184de263d7f241a380037b6387910c9a75
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b68f148982346537acf196edf0aa44542990dee8efc3893aa00dae2ca2e993b5
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83c5d85e32786b1e35a549f6e7bfc25b63f0617678a2a77d09f7e94475702a2a
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9af065edeaca88f16d8fcf52e4c33f05f28955d2f4f45d569975711d2168cd7
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 8500,
|
| 3 |
"best_metric": 4.392988204956055,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-8500",
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1470,6 +1470,92 @@
|
|
| 1470 |
"eval_samples_per_second": 53.079,
|
| 1471 |
"eval_steps_per_second": 13.27,
|
| 1472 |
"step": 8500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1473 |
}
|
| 1474 |
],
|
| 1475 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 8500,
|
| 3 |
"best_metric": 4.392988204956055,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-8500",
|
| 5 |
+
"epoch": 0.6922544419660026,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 9000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1470 |
"eval_samples_per_second": 53.079,
|
| 1471 |
"eval_steps_per_second": 13.27,
|
| 1472 |
"step": 8500
|
| 1473 |
+
},
|
| 1474 |
+
{
|
| 1475 |
+
"epoch": 0.6576417198677025,
|
| 1476 |
+
"grad_norm": 2.6913883686065674,
|
| 1477 |
+
"learning_rate": 9.656042057277304e-05,
|
| 1478 |
+
"loss": 4.4409,
|
| 1479 |
+
"step": 8550
|
| 1480 |
+
},
|
| 1481 |
+
{
|
| 1482 |
+
"epoch": 0.6614875778786247,
|
| 1483 |
+
"grad_norm": 3.749894618988037,
|
| 1484 |
+
"learning_rate": 9.630307709275394e-05,
|
| 1485 |
+
"loss": 4.5101,
|
| 1486 |
+
"step": 8600
|
| 1487 |
+
},
|
| 1488 |
+
{
|
| 1489 |
+
"epoch": 0.665333435889547,
|
| 1490 |
+
"grad_norm": 4.93977165222168,
|
| 1491 |
+
"learning_rate": 9.604573361273482e-05,
|
| 1492 |
+
"loss": 4.4504,
|
| 1493 |
+
"step": 8650
|
| 1494 |
+
},
|
| 1495 |
+
{
|
| 1496 |
+
"epoch": 0.6691792939004692,
|
| 1497 |
+
"grad_norm": 4.311313152313232,
|
| 1498 |
+
"learning_rate": 9.578839013271571e-05,
|
| 1499 |
+
"loss": 4.4857,
|
| 1500 |
+
"step": 8700
|
| 1501 |
+
},
|
| 1502 |
+
{
|
| 1503 |
+
"epoch": 0.6730251519113915,
|
| 1504 |
+
"grad_norm": 3.646656036376953,
|
| 1505 |
+
"learning_rate": 9.553104665269659e-05,
|
| 1506 |
+
"loss": 4.387,
|
| 1507 |
+
"step": 8750
|
| 1508 |
+
},
|
| 1509 |
+
{
|
| 1510 |
+
"epoch": 0.6730251519113915,
|
| 1511 |
+
"eval_loss": 4.401506423950195,
|
| 1512 |
+
"eval_runtime": 18.7931,
|
| 1513 |
+
"eval_samples_per_second": 53.211,
|
| 1514 |
+
"eval_steps_per_second": 13.303,
|
| 1515 |
+
"step": 8750
|
| 1516 |
+
},
|
| 1517 |
+
{
|
| 1518 |
+
"epoch": 0.6768710099223136,
|
| 1519 |
+
"grad_norm": 4.352843284606934,
|
| 1520 |
+
"learning_rate": 9.527370317267746e-05,
|
| 1521 |
+
"loss": 4.5279,
|
| 1522 |
+
"step": 8800
|
| 1523 |
+
},
|
| 1524 |
+
{
|
| 1525 |
+
"epoch": 0.6807168679332359,
|
| 1526 |
+
"grad_norm": 3.890216827392578,
|
| 1527 |
+
"learning_rate": 9.501635969265835e-05,
|
| 1528 |
+
"loss": 4.4485,
|
| 1529 |
+
"step": 8850
|
| 1530 |
+
},
|
| 1531 |
+
{
|
| 1532 |
+
"epoch": 0.6845627259441581,
|
| 1533 |
+
"grad_norm": 3.4119713306427,
|
| 1534 |
+
"learning_rate": 9.475901621263923e-05,
|
| 1535 |
+
"loss": 4.4428,
|
| 1536 |
+
"step": 8900
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 0.6884085839550804,
|
| 1540 |
+
"grad_norm": 7.813595294952393,
|
| 1541 |
+
"learning_rate": 9.450167273262012e-05,
|
| 1542 |
+
"loss": 4.3308,
|
| 1543 |
+
"step": 8950
|
| 1544 |
+
},
|
| 1545 |
+
{
|
| 1546 |
+
"epoch": 0.6922544419660026,
|
| 1547 |
+
"grad_norm": 3.079829692840576,
|
| 1548 |
+
"learning_rate": 9.4244329252601e-05,
|
| 1549 |
+
"loss": 4.368,
|
| 1550 |
+
"step": 9000
|
| 1551 |
+
},
|
| 1552 |
+
{
|
| 1553 |
+
"epoch": 0.6922544419660026,
|
| 1554 |
+
"eval_loss": 4.393312931060791,
|
| 1555 |
+
"eval_runtime": 18.7727,
|
| 1556 |
+
"eval_samples_per_second": 53.269,
|
| 1557 |
+
"eval_steps_per_second": 13.317,
|
| 1558 |
+
"step": 9000
|
| 1559 |
}
|
| 1560 |
],
|
| 1561 |
"logging_steps": 50,
|