Training in progress, step 1650, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e34a2741903c7131a591ab1ea100bd60a9f729205b2327f5d897a43dd1a350df
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8b868ab44d14f751a290f2fb9a43b0004c429bf63a62d6da5cdde1046626611
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:001785b24c383f1dc5e05ca97682558022e868af635239d8c60b6646c2c21747
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e0ba921a18d46c557f13f21f43adf78f0e35b0b4cbde5268f7f2125015b3077
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166430950164795,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1457,6 +1457,151 @@
|
|
| 1457 |
"EMA_steps_per_second": 24.774,
|
| 1458 |
"epoch": 65.21739130434783,
|
| 1459 |
"step": 1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1460 |
}
|
| 1461 |
],
|
| 1462 |
"logging_steps": 10,
|
|
@@ -1476,7 +1621,7 @@
|
|
| 1476 |
"attributes": {}
|
| 1477 |
}
|
| 1478 |
},
|
| 1479 |
-
"total_flos":
|
| 1480 |
"train_batch_size": 4,
|
| 1481 |
"trial_name": null,
|
| 1482 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166430950164795,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 71.73913043478261,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 1650,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1457 |
"EMA_steps_per_second": 24.774,
|
| 1458 |
"epoch": 65.21739130434783,
|
| 1459 |
"step": 1500
|
| 1460 |
+
},
|
| 1461 |
+
{
|
| 1462 |
+
"epoch": 65.65217391304348,
|
| 1463 |
+
"grad_norm": 2.2542121410369873,
|
| 1464 |
+
"learning_rate": 1.4981367287650419e-05,
|
| 1465 |
+
"loss": 0.3164,
|
| 1466 |
+
"step": 1510
|
| 1467 |
+
},
|
| 1468 |
+
{
|
| 1469 |
+
"epoch": 66.08695652173913,
|
| 1470 |
+
"grad_norm": 1.7643301486968994,
|
| 1471 |
+
"learning_rate": 1.4981303451854267e-05,
|
| 1472 |
+
"loss": 0.2947,
|
| 1473 |
+
"step": 1520
|
| 1474 |
+
},
|
| 1475 |
+
{
|
| 1476 |
+
"epoch": 66.52173913043478,
|
| 1477 |
+
"grad_norm": 1.7471901178359985,
|
| 1478 |
+
"learning_rate": 1.4981236647145501e-05,
|
| 1479 |
+
"loss": 0.3103,
|
| 1480 |
+
"step": 1530
|
| 1481 |
+
},
|
| 1482 |
+
{
|
| 1483 |
+
"epoch": 66.95652173913044,
|
| 1484 |
+
"grad_norm": 2.057833194732666,
|
| 1485 |
+
"learning_rate": 1.4981166873550601e-05,
|
| 1486 |
+
"loss": 0.3051,
|
| 1487 |
+
"step": 1540
|
| 1488 |
+
},
|
| 1489 |
+
{
|
| 1490 |
+
"epoch": 67.3913043478261,
|
| 1491 |
+
"grad_norm": 1.7425355911254883,
|
| 1492 |
+
"learning_rate": 1.4981094131097224e-05,
|
| 1493 |
+
"loss": 0.2713,
|
| 1494 |
+
"step": 1550
|
| 1495 |
+
},
|
| 1496 |
+
{
|
| 1497 |
+
"epoch": 67.82608695652173,
|
| 1498 |
+
"grad_norm": 2.050690174102783,
|
| 1499 |
+
"learning_rate": 1.49810184198142e-05,
|
| 1500 |
+
"loss": 0.3439,
|
| 1501 |
+
"step": 1560
|
| 1502 |
+
},
|
| 1503 |
+
{
|
| 1504 |
+
"epoch": 68.26086956521739,
|
| 1505 |
+
"grad_norm": 2.0778491497039795,
|
| 1506 |
+
"learning_rate": 1.498093973973154e-05,
|
| 1507 |
+
"loss": 0.2503,
|
| 1508 |
+
"step": 1570
|
| 1509 |
+
},
|
| 1510 |
+
{
|
| 1511 |
+
"epoch": 68.69565217391305,
|
| 1512 |
+
"grad_norm": 1.8078017234802246,
|
| 1513 |
+
"learning_rate": 1.4980858090880429e-05,
|
| 1514 |
+
"loss": 0.2862,
|
| 1515 |
+
"step": 1580
|
| 1516 |
+
},
|
| 1517 |
+
{
|
| 1518 |
+
"epoch": 69.1304347826087,
|
| 1519 |
+
"grad_norm": 1.9451018571853638,
|
| 1520 |
+
"learning_rate": 1.4980773473293232e-05,
|
| 1521 |
+
"loss": 0.368,
|
| 1522 |
+
"step": 1590
|
| 1523 |
+
},
|
| 1524 |
+
{
|
| 1525 |
+
"epoch": 69.56521739130434,
|
| 1526 |
+
"grad_norm": 1.9795953035354614,
|
| 1527 |
+
"learning_rate": 1.4980685887003486e-05,
|
| 1528 |
+
"loss": 0.3073,
|
| 1529 |
+
"step": 1600
|
| 1530 |
+
},
|
| 1531 |
+
{
|
| 1532 |
+
"epoch": 70.0,
|
| 1533 |
+
"grad_norm": 1.6645371913909912,
|
| 1534 |
+
"learning_rate": 1.498059533204591e-05,
|
| 1535 |
+
"loss": 0.2691,
|
| 1536 |
+
"step": 1610
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 70.43478260869566,
|
| 1540 |
+
"grad_norm": 2.21379017829895,
|
| 1541 |
+
"learning_rate": 1.4980501808456398e-05,
|
| 1542 |
+
"loss": 0.3142,
|
| 1543 |
+
"step": 1620
|
| 1544 |
+
},
|
| 1545 |
+
{
|
| 1546 |
+
"epoch": 70.8695652173913,
|
| 1547 |
+
"grad_norm": 1.9500844478607178,
|
| 1548 |
+
"learning_rate": 1.4980405316272018e-05,
|
| 1549 |
+
"loss": 0.2996,
|
| 1550 |
+
"step": 1630
|
| 1551 |
+
},
|
| 1552 |
+
{
|
| 1553 |
+
"epoch": 71.30434782608695,
|
| 1554 |
+
"grad_norm": 2.359870195388794,
|
| 1555 |
+
"learning_rate": 1.4980305855531015e-05,
|
| 1556 |
+
"loss": 0.2888,
|
| 1557 |
+
"step": 1640
|
| 1558 |
+
},
|
| 1559 |
+
{
|
| 1560 |
+
"epoch": 71.73913043478261,
|
| 1561 |
+
"grad_norm": 1.8895881175994873,
|
| 1562 |
+
"learning_rate": 1.4980203426272815e-05,
|
| 1563 |
+
"loss": 0.2624,
|
| 1564 |
+
"step": 1650
|
| 1565 |
+
},
|
| 1566 |
+
{
|
| 1567 |
+
"epoch": 71.73913043478261,
|
| 1568 |
+
"eval_loss": 0.847686767578125,
|
| 1569 |
+
"eval_runtime": 0.5359,
|
| 1570 |
+
"eval_samples_per_second": 18.659,
|
| 1571 |
+
"eval_steps_per_second": 18.659,
|
| 1572 |
+
"step": 1650
|
| 1573 |
+
},
|
| 1574 |
+
{
|
| 1575 |
+
"Start_State_loss": 0.8601926565170288,
|
| 1576 |
+
"Start_State_runtime": 0.3989,
|
| 1577 |
+
"Start_State_samples_per_second": 25.067,
|
| 1578 |
+
"Start_State_steps_per_second": 25.067,
|
| 1579 |
+
"epoch": 71.73913043478261,
|
| 1580 |
+
"step": 1650
|
| 1581 |
+
},
|
| 1582 |
+
{
|
| 1583 |
+
"Raw_Model_loss": 0.847686767578125,
|
| 1584 |
+
"Raw_Model_runtime": 0.4133,
|
| 1585 |
+
"Raw_Model_samples_per_second": 24.198,
|
| 1586 |
+
"Raw_Model_steps_per_second": 24.198,
|
| 1587 |
+
"epoch": 71.73913043478261,
|
| 1588 |
+
"step": 1650
|
| 1589 |
+
},
|
| 1590 |
+
{
|
| 1591 |
+
"SWA_loss": 0.7314801216125488,
|
| 1592 |
+
"SWA_runtime": 0.3914,
|
| 1593 |
+
"SWA_samples_per_second": 25.548,
|
| 1594 |
+
"SWA_steps_per_second": 25.548,
|
| 1595 |
+
"epoch": 71.73913043478261,
|
| 1596 |
+
"step": 1650
|
| 1597 |
+
},
|
| 1598 |
+
{
|
| 1599 |
+
"EMA_loss": 0.8605908155441284,
|
| 1600 |
+
"EMA_runtime": 0.3897,
|
| 1601 |
+
"EMA_samples_per_second": 25.662,
|
| 1602 |
+
"EMA_steps_per_second": 25.662,
|
| 1603 |
+
"epoch": 71.73913043478261,
|
| 1604 |
+
"step": 1650
|
| 1605 |
}
|
| 1606 |
],
|
| 1607 |
"logging_steps": 10,
|
|
|
|
| 1621 |
"attributes": {}
|
| 1622 |
}
|
| 1623 |
},
|
| 1624 |
+
"total_flos": 4.242701339976499e+16,
|
| 1625 |
"train_batch_size": 4,
|
| 1626 |
"trial_name": null,
|
| 1627 |
"trial_params": null
|