3N3G commited on
Commit
fa1e303
·
verified ·
1 Parent(s): 6f330d3

Training in progress, step 224, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50a7626c3332382c720b25d7028428e6e693206a85b1d278123f350e6447c549
3
  size 4969539560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f766264a80af3c2e0386eedf3905edbf56634837a038ce95c6038d7405eedfe
3
  size 4969539560
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:674ac2c674747082527a37e7013363c3374ff004d5b78edf91c3585792370cd4
3
  size 1912795688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6e24e7e534a14d518b12200bfaba3ba2cedbbafce9b0fbda9c2aca6057ce604
3
  size 1912795688
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 52.0,
6
  "eval_steps": 16,
7
- "global_step": 208,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1568,6 +1568,126 @@
1568
  "eval_samples_per_second": 18.8,
1569
  "eval_steps_per_second": 18.8,
1570
  "step": 208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1571
  }
1572
  ],
1573
  "logging_steps": 1,
@@ -1587,7 +1707,7 @@
1587
  "attributes": {}
1588
  }
1589
  },
1590
- "total_flos": 6.975970872459264e+16,
1591
  "train_batch_size": 1,
1592
  "trial_name": null,
1593
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 56.0,
6
  "eval_steps": 16,
7
+ "global_step": 224,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1568
  "eval_samples_per_second": 18.8,
1569
  "eval_steps_per_second": 18.8,
1570
  "step": 208
1571
+ },
1572
+ {
1573
+ "epoch": 52.29090909090909,
1574
+ "grad_norm": 3.8842809200286865,
1575
+ "learning_rate": 5.970378084704441e-08,
1576
+ "loss": 0.6428,
1577
+ "step": 209
1578
+ },
1579
+ {
1580
+ "epoch": 52.58181818181818,
1581
+ "grad_norm": 4.9067301750183105,
1582
+ "learning_rate": 5.931305886341008e-08,
1583
+ "loss": 0.7572,
1584
+ "step": 210
1585
+ },
1586
+ {
1587
+ "epoch": 52.872727272727275,
1588
+ "grad_norm": 4.025907516479492,
1589
+ "learning_rate": 5.892200842364462e-08,
1590
+ "loss": 0.6545,
1591
+ "step": 211
1592
+ },
1593
+ {
1594
+ "epoch": 53.0,
1595
+ "grad_norm": 4.105547904968262,
1596
+ "learning_rate": 5.853065930775303e-08,
1597
+ "loss": 0.6439,
1598
+ "step": 212
1599
+ },
1600
+ {
1601
+ "epoch": 53.29090909090909,
1602
+ "grad_norm": 3.7520296573638916,
1603
+ "learning_rate": 5.813904131848564e-08,
1604
+ "loss": 0.677,
1605
+ "step": 213
1606
+ },
1607
+ {
1608
+ "epoch": 53.58181818181818,
1609
+ "grad_norm": 3.975045680999756,
1610
+ "learning_rate": 5.7747184279068564e-08,
1611
+ "loss": 0.6321,
1612
+ "step": 214
1613
+ },
1614
+ {
1615
+ "epoch": 53.872727272727275,
1616
+ "grad_norm": 4.536473274230957,
1617
+ "learning_rate": 5.735511803093248e-08,
1618
+ "loss": 0.7326,
1619
+ "step": 215
1620
+ },
1621
+ {
1622
+ "epoch": 54.0,
1623
+ "grad_norm": 5.148712158203125,
1624
+ "learning_rate": 5.696287243144012e-08,
1625
+ "loss": 0.6819,
1626
+ "step": 216
1627
+ },
1628
+ {
1629
+ "epoch": 54.29090909090909,
1630
+ "grad_norm": 3.6721999645233154,
1631
+ "learning_rate": 5.6570477351612554e-08,
1632
+ "loss": 0.6655,
1633
+ "step": 217
1634
+ },
1635
+ {
1636
+ "epoch": 54.58181818181818,
1637
+ "grad_norm": 4.29323148727417,
1638
+ "learning_rate": 5.61779626738543e-08,
1639
+ "loss": 0.6743,
1640
+ "step": 218
1641
+ },
1642
+ {
1643
+ "epoch": 54.872727272727275,
1644
+ "grad_norm": 4.018572807312012,
1645
+ "learning_rate": 5.5785358289677765e-08,
1646
+ "loss": 0.711,
1647
+ "step": 219
1648
+ },
1649
+ {
1650
+ "epoch": 55.0,
1651
+ "grad_norm": 4.6550445556640625,
1652
+ "learning_rate": 5.539269409742683e-08,
1653
+ "loss": 0.6398,
1654
+ "step": 220
1655
+ },
1656
+ {
1657
+ "epoch": 55.29090909090909,
1658
+ "grad_norm": 4.599621295928955,
1659
+ "learning_rate": 5.5e-08,
1660
+ "loss": 0.6885,
1661
+ "step": 221
1662
+ },
1663
+ {
1664
+ "epoch": 55.58181818181818,
1665
+ "grad_norm": 3.6876866817474365,
1666
+ "learning_rate": 5.460730590257318e-08,
1667
+ "loss": 0.6391,
1668
+ "step": 222
1669
+ },
1670
+ {
1671
+ "epoch": 55.872727272727275,
1672
+ "grad_norm": 3.641345262527466,
1673
+ "learning_rate": 5.421464171032224e-08,
1674
+ "loss": 0.6684,
1675
+ "step": 223
1676
+ },
1677
+ {
1678
+ "epoch": 56.0,
1679
+ "grad_norm": 4.325244903564453,
1680
+ "learning_rate": 5.382203732614572e-08,
1681
+ "loss": 0.7467,
1682
+ "step": 224
1683
+ },
1684
+ {
1685
+ "epoch": 56.0,
1686
+ "eval_loss": 0.6532977819442749,
1687
+ "eval_runtime": 0.746,
1688
+ "eval_samples_per_second": 17.427,
1689
+ "eval_steps_per_second": 17.427,
1690
+ "step": 224
1691
  }
1692
  ],
1693
  "logging_steps": 1,
 
1707
  "attributes": {}
1708
  }
1709
  },
1710
+ "total_flos": 7.512584016494592e+16,
1711
  "train_batch_size": 1,
1712
  "trial_name": null,
1713
  "trial_params": null