irishprancer commited on
Commit
9a99c91
·
verified ·
1 Parent(s): 2bf0b0d

Training in progress, step 1800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e34a2741903c7131a591ab1ea100bd60a9f729205b2327f5d897a43dd1a350df
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24b0844f48802856ec7fd062b5709ea322f8eeb94234eec4d37c4a7382b9a77d
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8b868ab44d14f751a290f2fb9a43b0004c429bf63a62d6da5cdde1046626611
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82d2ef26f0dbfc4a6f2ca1147a92ec246182a3349e536111f87633feba51cb62
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001785b24c383f1dc5e05ca97682558022e868af635239d8c60b6646c2c21747
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8f11fbb1dc348e5245b6e67b90b53d052aa55ce6bbd45d7369c3c11528ee140
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e0ba921a18d46c557f13f21f43adf78f0e35b0b4cbde5268f7f2125015b3077
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a8b759658b308282d06846d4dfda31388c652c687853c092da47be547d0736c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166430950164795,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 71.73913043478261,
5
  "eval_steps": 150,
6
- "global_step": 1650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1602,6 +1602,151 @@
1602
  "EMA_steps_per_second": 25.662,
1603
  "epoch": 71.73913043478261,
1604
  "step": 1650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1605
  }
1606
  ],
1607
  "logging_steps": 10,
@@ -1621,7 +1766,7 @@
1621
  "attributes": {}
1622
  }
1623
  },
1624
- "total_flos": 4.242701339976499e+16,
1625
  "train_batch_size": 4,
1626
  "trial_name": null,
1627
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166430950164795,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 78.26086956521739,
5
  "eval_steps": 150,
6
+ "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1602
  "EMA_steps_per_second": 25.662,
1603
  "epoch": 71.73913043478261,
1604
  "step": 1650
1605
+ },
1606
+ {
1607
+ "epoch": 72.17391304347827,
1608
+ "grad_norm": 1.664617896080017,
1609
+ "learning_rate": 1.4980098028538014e-05,
1610
+ "loss": 0.3276,
1611
+ "step": 1660
1612
+ },
1613
+ {
1614
+ "epoch": 72.6086956521739,
1615
+ "grad_norm": 2.0435194969177246,
1616
+ "learning_rate": 1.4979989662368391e-05,
1617
+ "loss": 0.2963,
1618
+ "step": 1670
1619
+ },
1620
+ {
1621
+ "epoch": 73.04347826086956,
1622
+ "grad_norm": 1.906278133392334,
1623
+ "learning_rate": 1.4979878327806899e-05,
1624
+ "loss": 0.3093,
1625
+ "step": 1680
1626
+ },
1627
+ {
1628
+ "epoch": 73.47826086956522,
1629
+ "grad_norm": 2.026448965072632,
1630
+ "learning_rate": 1.4979764024897668e-05,
1631
+ "loss": 0.2875,
1632
+ "step": 1690
1633
+ },
1634
+ {
1635
+ "epoch": 73.91304347826087,
1636
+ "grad_norm": 1.8200604915618896,
1637
+ "learning_rate": 1.4979646753686002e-05,
1638
+ "loss": 0.2793,
1639
+ "step": 1700
1640
+ },
1641
+ {
1642
+ "epoch": 74.34782608695652,
1643
+ "grad_norm": 1.414810061454773,
1644
+ "learning_rate": 1.4979526514218385e-05,
1645
+ "loss": 0.277,
1646
+ "step": 1710
1647
+ },
1648
+ {
1649
+ "epoch": 74.78260869565217,
1650
+ "grad_norm": 1.4874234199523926,
1651
+ "learning_rate": 1.4979403306542473e-05,
1652
+ "loss": 0.3277,
1653
+ "step": 1720
1654
+ },
1655
+ {
1656
+ "epoch": 75.21739130434783,
1657
+ "grad_norm": 1.5648179054260254,
1658
+ "learning_rate": 1.4979277130707107e-05,
1659
+ "loss": 0.2337,
1660
+ "step": 1730
1661
+ },
1662
+ {
1663
+ "epoch": 75.65217391304348,
1664
+ "grad_norm": 1.6863374710083008,
1665
+ "learning_rate": 1.4979147986762295e-05,
1666
+ "loss": 0.3146,
1667
+ "step": 1740
1668
+ },
1669
+ {
1670
+ "epoch": 76.08695652173913,
1671
+ "grad_norm": 1.7994861602783203,
1672
+ "learning_rate": 1.4979015874759227e-05,
1673
+ "loss": 0.2694,
1674
+ "step": 1750
1675
+ },
1676
+ {
1677
+ "epoch": 76.52173913043478,
1678
+ "grad_norm": 1.8553599119186401,
1679
+ "learning_rate": 1.4978880794750266e-05,
1680
+ "loss": 0.2661,
1681
+ "step": 1760
1682
+ },
1683
+ {
1684
+ "epoch": 76.95652173913044,
1685
+ "grad_norm": 1.3038052320480347,
1686
+ "learning_rate": 1.4978742746788957e-05,
1687
+ "loss": 0.3005,
1688
+ "step": 1770
1689
+ },
1690
+ {
1691
+ "epoch": 77.3913043478261,
1692
+ "grad_norm": 1.8376268148422241,
1693
+ "learning_rate": 1.4978601730930014e-05,
1694
+ "loss": 0.2843,
1695
+ "step": 1780
1696
+ },
1697
+ {
1698
+ "epoch": 77.82608695652173,
1699
+ "grad_norm": 1.8291127681732178,
1700
+ "learning_rate": 1.4978457747229335e-05,
1701
+ "loss": 0.2715,
1702
+ "step": 1790
1703
+ },
1704
+ {
1705
+ "epoch": 78.26086956521739,
1706
+ "grad_norm": 1.933289885520935,
1707
+ "learning_rate": 1.497831079574399e-05,
1708
+ "loss": 0.3055,
1709
+ "step": 1800
1710
+ },
1711
+ {
1712
+ "epoch": 78.26086956521739,
1713
+ "eval_loss": 0.8690454363822937,
1714
+ "eval_runtime": 0.4165,
1715
+ "eval_samples_per_second": 24.008,
1716
+ "eval_steps_per_second": 24.008,
1717
+ "step": 1800
1718
+ },
1719
+ {
1720
+ "Start_State_loss": 0.8601926565170288,
1721
+ "Start_State_runtime": 0.3983,
1722
+ "Start_State_samples_per_second": 25.109,
1723
+ "Start_State_steps_per_second": 25.109,
1724
+ "epoch": 78.26086956521739,
1725
+ "step": 1800
1726
+ },
1727
+ {
1728
+ "Raw_Model_loss": 0.8690454363822937,
1729
+ "Raw_Model_runtime": 0.4147,
1730
+ "Raw_Model_samples_per_second": 24.115,
1731
+ "Raw_Model_steps_per_second": 24.115,
1732
+ "epoch": 78.26086956521739,
1733
+ "step": 1800
1734
+ },
1735
+ {
1736
+ "SWA_loss": 0.7372413873672485,
1737
+ "SWA_runtime": 0.4087,
1738
+ "SWA_samples_per_second": 24.465,
1739
+ "SWA_steps_per_second": 24.465,
1740
+ "epoch": 78.26086956521739,
1741
+ "step": 1800
1742
+ },
1743
+ {
1744
+ "EMA_loss": 0.8606707453727722,
1745
+ "EMA_runtime": 0.4092,
1746
+ "EMA_samples_per_second": 24.44,
1747
+ "EMA_steps_per_second": 24.44,
1748
+ "epoch": 78.26086956521739,
1749
+ "step": 1800
1750
  }
1751
  ],
1752
  "logging_steps": 10,
 
1766
  "attributes": {}
1767
  }
1768
  },
1769
+ "total_flos": 4.631084552967782e+16,
1770
  "train_batch_size": 4,
1771
  "trial_name": null,
1772
  "trial_params": null