Training in progress, epoch 16, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1227009528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e1ea03da8d9a978320d45f1bc6677407a85624af3d9baa7bae32e5c03676367
|
| 3 |
size 1227009528
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2454133690
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b98a2483ec61025369cf6eb8fec5397cf636bfb0ffa7a3eedf987ef5b4b9d5c8
|
| 3 |
size 2454133690
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ffc97010f20f826b75fdc09ec365ad76a45dfcdc64194b72334d5902c2cf28eb
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5384c34df266d1db083f57452aa67b48a3012f0aeee7f4ad7194984e89d75fe
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 34.
|
| 3 |
-
"best_model_checkpoint": "/kaggle/working/output/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1492,6 +1492,105 @@
|
|
| 1492 |
"eval_samples_per_second": 26.507,
|
| 1493 |
"eval_steps_per_second": 3.331,
|
| 1494 |
"step": 19575
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1495 |
}
|
| 1496 |
],
|
| 1497 |
"logging_steps": 100,
|
|
@@ -1506,7 +1605,7 @@
|
|
| 1506 |
"early_stopping_threshold": 0.0
|
| 1507 |
},
|
| 1508 |
"attributes": {
|
| 1509 |
-
"early_stopping_patience_counter":
|
| 1510 |
}
|
| 1511 |
},
|
| 1512 |
"TrainerControl": {
|
|
@@ -1520,7 +1619,7 @@
|
|
| 1520 |
"attributes": {}
|
| 1521 |
}
|
| 1522 |
},
|
| 1523 |
-
"total_flos": 2.
|
| 1524 |
"train_batch_size": 8,
|
| 1525 |
"trial_name": null,
|
| 1526 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 34.54485321044922,
|
| 3 |
+
"best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
|
| 4 |
+
"epoch": 16.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 20880,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1492 |
"eval_samples_per_second": 26.507,
|
| 1493 |
"eval_steps_per_second": 3.331,
|
| 1494 |
"step": 19575
|
| 1495 |
+
},
|
| 1496 |
+
{
|
| 1497 |
+
"epoch": 15.019157088122606,
|
| 1498 |
+
"grad_norm": 3.291614294052124,
|
| 1499 |
+
"learning_rate": 4.061733716475096e-05,
|
| 1500 |
+
"loss": 32.9437,
|
| 1501 |
+
"step": 19600
|
| 1502 |
+
},
|
| 1503 |
+
{
|
| 1504 |
+
"epoch": 15.095785440613026,
|
| 1505 |
+
"grad_norm": 4.4670867919921875,
|
| 1506 |
+
"learning_rate": 4.056944444444445e-05,
|
| 1507 |
+
"loss": 33.6879,
|
| 1508 |
+
"step": 19700
|
| 1509 |
+
},
|
| 1510 |
+
{
|
| 1511 |
+
"epoch": 15.172413793103448,
|
| 1512 |
+
"grad_norm": 3.4122018814086914,
|
| 1513 |
+
"learning_rate": 4.0521551724137934e-05,
|
| 1514 |
+
"loss": 33.0167,
|
| 1515 |
+
"step": 19800
|
| 1516 |
+
},
|
| 1517 |
+
{
|
| 1518 |
+
"epoch": 15.24904214559387,
|
| 1519 |
+
"grad_norm": 3.854083299636841,
|
| 1520 |
+
"learning_rate": 4.047365900383142e-05,
|
| 1521 |
+
"loss": 33.8342,
|
| 1522 |
+
"step": 19900
|
| 1523 |
+
},
|
| 1524 |
+
{
|
| 1525 |
+
"epoch": 15.32567049808429,
|
| 1526 |
+
"grad_norm": 2.945396900177002,
|
| 1527 |
+
"learning_rate": 4.042576628352491e-05,
|
| 1528 |
+
"loss": 32.3812,
|
| 1529 |
+
"step": 20000
|
| 1530 |
+
},
|
| 1531 |
+
{
|
| 1532 |
+
"epoch": 15.402298850574713,
|
| 1533 |
+
"grad_norm": 2.5246341228485107,
|
| 1534 |
+
"learning_rate": 4.0377873563218395e-05,
|
| 1535 |
+
"loss": 33.3573,
|
| 1536 |
+
"step": 20100
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 15.478927203065133,
|
| 1540 |
+
"grad_norm": 2.837134599685669,
|
| 1541 |
+
"learning_rate": 4.032998084291188e-05,
|
| 1542 |
+
"loss": 33.5981,
|
| 1543 |
+
"step": 20200
|
| 1544 |
+
},
|
| 1545 |
+
{
|
| 1546 |
+
"epoch": 15.555555555555555,
|
| 1547 |
+
"grad_norm": 4.350450038909912,
|
| 1548 |
+
"learning_rate": 4.028208812260537e-05,
|
| 1549 |
+
"loss": 34.0699,
|
| 1550 |
+
"step": 20300
|
| 1551 |
+
},
|
| 1552 |
+
{
|
| 1553 |
+
"epoch": 15.632183908045977,
|
| 1554 |
+
"grad_norm": 2.4908435344696045,
|
| 1555 |
+
"learning_rate": 4.0234195402298855e-05,
|
| 1556 |
+
"loss": 33.8105,
|
| 1557 |
+
"step": 20400
|
| 1558 |
+
},
|
| 1559 |
+
{
|
| 1560 |
+
"epoch": 15.708812260536398,
|
| 1561 |
+
"grad_norm": 2.9461615085601807,
|
| 1562 |
+
"learning_rate": 4.0186302681992336e-05,
|
| 1563 |
+
"loss": 33.3251,
|
| 1564 |
+
"step": 20500
|
| 1565 |
+
},
|
| 1566 |
+
{
|
| 1567 |
+
"epoch": 15.78544061302682,
|
| 1568 |
+
"grad_norm": 2.8716940879821777,
|
| 1569 |
+
"learning_rate": 4.013840996168582e-05,
|
| 1570 |
+
"loss": 33.7594,
|
| 1571 |
+
"step": 20600
|
| 1572 |
+
},
|
| 1573 |
+
{
|
| 1574 |
+
"epoch": 15.862068965517242,
|
| 1575 |
+
"grad_norm": 2.7166991233825684,
|
| 1576 |
+
"learning_rate": 4.009051724137931e-05,
|
| 1577 |
+
"loss": 33.58,
|
| 1578 |
+
"step": 20700
|
| 1579 |
+
},
|
| 1580 |
+
{
|
| 1581 |
+
"epoch": 15.938697318007662,
|
| 1582 |
+
"grad_norm": 2.2878618240356445,
|
| 1583 |
+
"learning_rate": 4.0042624521072796e-05,
|
| 1584 |
+
"loss": 33.4573,
|
| 1585 |
+
"step": 20800
|
| 1586 |
+
},
|
| 1587 |
+
{
|
| 1588 |
+
"epoch": 16.0,
|
| 1589 |
+
"eval_loss": 34.54485321044922,
|
| 1590 |
+
"eval_runtime": 49.3188,
|
| 1591 |
+
"eval_samples_per_second": 26.46,
|
| 1592 |
+
"eval_steps_per_second": 3.325,
|
| 1593 |
+
"step": 20880
|
| 1594 |
}
|
| 1595 |
],
|
| 1596 |
"logging_steps": 100,
|
|
|
|
| 1605 |
"early_stopping_threshold": 0.0
|
| 1606 |
},
|
| 1607 |
"attributes": {
|
| 1608 |
+
"early_stopping_patience_counter": 0
|
| 1609 |
}
|
| 1610 |
},
|
| 1611 |
"TrainerControl": {
|
|
|
|
| 1619 |
"attributes": {}
|
| 1620 |
}
|
| 1621 |
},
|
| 1622 |
+
"total_flos": 2.251697444875469e+16,
|
| 1623 |
"train_batch_size": 8,
|
| 1624 |
"trial_name": null,
|
| 1625 |
"trial_params": null
|