Training in progress, step 80000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +103 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ddeabb9ead685c5e5b416b4a981e11a787d94773db5c89384835f8ea6b2e1c4
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98dacb4579cc2ad8d273f8bac5c3977b66490dafa2ef002d312807df6670d4cd
|
| 3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15523
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79b99893c9b02fa371856f47a0ec288962436c435769873b9db7532898348d23
|
| 3 |
size 15523
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56e8d48d0939e6a173efa67076df748028040c8480b0133ac53f27544c88363a
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71d5e106c9d23676ccfa26cba844ba11ee123c667bd6da5c807ecc94bb13e886
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 6.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1506,11 +1506,111 @@
|
|
| 1506 |
"eval_samples_per_second": 21.395,
|
| 1507 |
"eval_steps_per_second": 0.685,
|
| 1508 |
"step": 75000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1509 |
}
|
| 1510 |
],
|
| 1511 |
"max_steps": 1000000,
|
| 1512 |
"num_train_epochs": 86,
|
| 1513 |
-
"total_flos": 3.
|
| 1514 |
"trial_name": null,
|
| 1515 |
"trial_params": null
|
| 1516 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 6.805035726437564,
|
| 5 |
+
"global_step": 80000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1506 |
"eval_samples_per_second": 21.395,
|
| 1507 |
"eval_steps_per_second": 0.685,
|
| 1508 |
"step": 75000
|
| 1509 |
+
},
|
| 1510 |
+
{
|
| 1511 |
+
"epoch": 6.42,
|
| 1512 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1513 |
+
"loss": 0.406,
|
| 1514 |
+
"step": 75500
|
| 1515 |
+
},
|
| 1516 |
+
{
|
| 1517 |
+
"epoch": 6.46,
|
| 1518 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1519 |
+
"loss": 0.4054,
|
| 1520 |
+
"step": 76000
|
| 1521 |
+
},
|
| 1522 |
+
{
|
| 1523 |
+
"epoch": 6.46,
|
| 1524 |
+
"eval_loss": 0.3762701749801636,
|
| 1525 |
+
"eval_runtime": 15.2662,
|
| 1526 |
+
"eval_samples_per_second": 32.752,
|
| 1527 |
+
"eval_steps_per_second": 1.048,
|
| 1528 |
+
"step": 76000
|
| 1529 |
+
},
|
| 1530 |
+
{
|
| 1531 |
+
"epoch": 6.51,
|
| 1532 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1533 |
+
"loss": 0.4047,
|
| 1534 |
+
"step": 76500
|
| 1535 |
+
},
|
| 1536 |
+
{
|
| 1537 |
+
"epoch": 6.55,
|
| 1538 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1539 |
+
"loss": 0.4043,
|
| 1540 |
+
"step": 77000
|
| 1541 |
+
},
|
| 1542 |
+
{
|
| 1543 |
+
"epoch": 6.55,
|
| 1544 |
+
"eval_loss": 0.3773665130138397,
|
| 1545 |
+
"eval_runtime": 23.0339,
|
| 1546 |
+
"eval_samples_per_second": 21.707,
|
| 1547 |
+
"eval_steps_per_second": 0.695,
|
| 1548 |
+
"step": 77000
|
| 1549 |
+
},
|
| 1550 |
+
{
|
| 1551 |
+
"epoch": 6.59,
|
| 1552 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1553 |
+
"loss": 0.4041,
|
| 1554 |
+
"step": 77500
|
| 1555 |
+
},
|
| 1556 |
+
{
|
| 1557 |
+
"epoch": 6.63,
|
| 1558 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1559 |
+
"loss": 0.4044,
|
| 1560 |
+
"step": 78000
|
| 1561 |
+
},
|
| 1562 |
+
{
|
| 1563 |
+
"epoch": 6.63,
|
| 1564 |
+
"eval_loss": 0.3738757371902466,
|
| 1565 |
+
"eval_runtime": 16.5496,
|
| 1566 |
+
"eval_samples_per_second": 30.212,
|
| 1567 |
+
"eval_steps_per_second": 0.967,
|
| 1568 |
+
"step": 78000
|
| 1569 |
+
},
|
| 1570 |
+
{
|
| 1571 |
+
"epoch": 6.68,
|
| 1572 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1573 |
+
"loss": 0.4038,
|
| 1574 |
+
"step": 78500
|
| 1575 |
+
},
|
| 1576 |
+
{
|
| 1577 |
+
"epoch": 6.72,
|
| 1578 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1579 |
+
"loss": 0.4038,
|
| 1580 |
+
"step": 79000
|
| 1581 |
+
},
|
| 1582 |
+
{
|
| 1583 |
+
"epoch": 6.72,
|
| 1584 |
+
"eval_loss": 0.37452879548072815,
|
| 1585 |
+
"eval_runtime": 16.7684,
|
| 1586 |
+
"eval_samples_per_second": 29.818,
|
| 1587 |
+
"eval_steps_per_second": 0.954,
|
| 1588 |
+
"step": 79000
|
| 1589 |
+
},
|
| 1590 |
+
{
|
| 1591 |
+
"epoch": 6.76,
|
| 1592 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1593 |
+
"loss": 0.4039,
|
| 1594 |
+
"step": 79500
|
| 1595 |
+
},
|
| 1596 |
+
{
|
| 1597 |
+
"epoch": 6.81,
|
| 1598 |
+
"learning_rate": 9.999999999999999e-06,
|
| 1599 |
+
"loss": 0.4045,
|
| 1600 |
+
"step": 80000
|
| 1601 |
+
},
|
| 1602 |
+
{
|
| 1603 |
+
"epoch": 6.81,
|
| 1604 |
+
"eval_loss": 0.3761942684650421,
|
| 1605 |
+
"eval_runtime": 16.6694,
|
| 1606 |
+
"eval_samples_per_second": 29.995,
|
| 1607 |
+
"eval_steps_per_second": 0.96,
|
| 1608 |
+
"step": 80000
|
| 1609 |
}
|
| 1610 |
],
|
| 1611 |
"max_steps": 1000000,
|
| 1612 |
"num_train_epochs": 86,
|
| 1613 |
+
"total_flos": 3.680166814926219e+21,
|
| 1614 |
"trial_name": null,
|
| 1615 |
"trial_params": null
|
| 1616 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98dacb4579cc2ad8d273f8bac5c3977b66490dafa2ef002d312807df6670d4cd
|
| 3 |
size 449471589
|