irishprancer commited on
Commit
2d397ad
·
verified ·
1 Parent(s): 7f1788e

Training in progress, step 1800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3af98a6bbc1b21157f06c9dadb62daa1e1d00adfe26d7090e693b71989087ae
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d04d5fa41fa6f39ff0e99d56a6427bf17f28fc57677e4371c961796044f5d2a3
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f582039ac055bd7a42eb85aa1505df7e0717c40124b85c35253f1f3e1f58f5db
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccaaf38526637635b1485eaad8168918d1ff4276d494189eacd7059d25f24082
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001785b24c383f1dc5e05ca97682558022e868af635239d8c60b6646c2c21747
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8f11fbb1dc348e5245b6e67b90b53d052aa55ce6bbd45d7369c3c11528ee140
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e0ba921a18d46c557f13f21f43adf78f0e35b0b4cbde5268f7f2125015b3077
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a8b759658b308282d06846d4dfda31388c652c687853c092da47be547d0736c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 71.73913043478261,
5
  "eval_steps": 150,
6
- "global_step": 1650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1602,6 +1602,151 @@
1602
  "EMA_steps_per_second": 22.836,
1603
  "epoch": 71.73913043478261,
1604
  "step": 1650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1605
  }
1606
  ],
1607
  "logging_steps": 10,
@@ -1621,7 +1766,7 @@
1621
  "attributes": {}
1622
  }
1623
  },
1624
- "total_flos": 4.242701339976499e+16,
1625
  "train_batch_size": 4,
1626
  "trial_name": null,
1627
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 78.26086956521739,
5
  "eval_steps": 150,
6
+ "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1602
  "EMA_steps_per_second": 22.836,
1603
  "epoch": 71.73913043478261,
1604
  "step": 1650
1605
+ },
1606
+ {
1607
+ "epoch": 72.17391304347827,
1608
+ "grad_norm": 1.6176584959030151,
1609
+ "learning_rate": 1.4980098028538014e-05,
1610
+ "loss": 0.3276,
1611
+ "step": 1660
1612
+ },
1613
+ {
1614
+ "epoch": 72.6086956521739,
1615
+ "grad_norm": 2.0951242446899414,
1616
+ "learning_rate": 1.4979989662368391e-05,
1617
+ "loss": 0.2962,
1618
+ "step": 1670
1619
+ },
1620
+ {
1621
+ "epoch": 73.04347826086956,
1622
+ "grad_norm": 1.9010318517684937,
1623
+ "learning_rate": 1.4979878327806899e-05,
1624
+ "loss": 0.3096,
1625
+ "step": 1680
1626
+ },
1627
+ {
1628
+ "epoch": 73.47826086956522,
1629
+ "grad_norm": 1.990721344947815,
1630
+ "learning_rate": 1.4979764024897668e-05,
1631
+ "loss": 0.2877,
1632
+ "step": 1690
1633
+ },
1634
+ {
1635
+ "epoch": 73.91304347826087,
1636
+ "grad_norm": 1.8217382431030273,
1637
+ "learning_rate": 1.4979646753686002e-05,
1638
+ "loss": 0.2797,
1639
+ "step": 1700
1640
+ },
1641
+ {
1642
+ "epoch": 74.34782608695652,
1643
+ "grad_norm": 1.3920949697494507,
1644
+ "learning_rate": 1.4979526514218385e-05,
1645
+ "loss": 0.277,
1646
+ "step": 1710
1647
+ },
1648
+ {
1649
+ "epoch": 74.78260869565217,
1650
+ "grad_norm": 1.4952901601791382,
1651
+ "learning_rate": 1.4979403306542473e-05,
1652
+ "loss": 0.3281,
1653
+ "step": 1720
1654
+ },
1655
+ {
1656
+ "epoch": 75.21739130434783,
1657
+ "grad_norm": 1.6056287288665771,
1658
+ "learning_rate": 1.4979277130707107e-05,
1659
+ "loss": 0.234,
1660
+ "step": 1730
1661
+ },
1662
+ {
1663
+ "epoch": 75.65217391304348,
1664
+ "grad_norm": 1.6877388954162598,
1665
+ "learning_rate": 1.4979147986762295e-05,
1666
+ "loss": 0.3147,
1667
+ "step": 1740
1668
+ },
1669
+ {
1670
+ "epoch": 76.08695652173913,
1671
+ "grad_norm": 1.7907490730285645,
1672
+ "learning_rate": 1.4979015874759227e-05,
1673
+ "loss": 0.2696,
1674
+ "step": 1750
1675
+ },
1676
+ {
1677
+ "epoch": 76.52173913043478,
1678
+ "grad_norm": 1.866333246231079,
1679
+ "learning_rate": 1.4978880794750266e-05,
1680
+ "loss": 0.2663,
1681
+ "step": 1760
1682
+ },
1683
+ {
1684
+ "epoch": 76.95652173913044,
1685
+ "grad_norm": 1.275960087776184,
1686
+ "learning_rate": 1.4978742746788957e-05,
1687
+ "loss": 0.3004,
1688
+ "step": 1770
1689
+ },
1690
+ {
1691
+ "epoch": 77.3913043478261,
1692
+ "grad_norm": 1.8372234106063843,
1693
+ "learning_rate": 1.4978601730930014e-05,
1694
+ "loss": 0.2842,
1695
+ "step": 1780
1696
+ },
1697
+ {
1698
+ "epoch": 77.82608695652173,
1699
+ "grad_norm": 1.8203933238983154,
1700
+ "learning_rate": 1.4978457747229335e-05,
1701
+ "loss": 0.2714,
1702
+ "step": 1790
1703
+ },
1704
+ {
1705
+ "epoch": 78.26086956521739,
1706
+ "grad_norm": 1.8666887283325195,
1707
+ "learning_rate": 1.497831079574399e-05,
1708
+ "loss": 0.3054,
1709
+ "step": 1800
1710
+ },
1711
+ {
1712
+ "epoch": 78.26086956521739,
1713
+ "eval_loss": 0.8706566691398621,
1714
+ "eval_runtime": 0.4194,
1715
+ "eval_samples_per_second": 23.844,
1716
+ "eval_steps_per_second": 23.844,
1717
+ "step": 1800
1718
+ },
1719
+ {
1720
+ "Start_State_loss": 0.861186683177948,
1721
+ "Start_State_runtime": 0.4248,
1722
+ "Start_State_samples_per_second": 23.542,
1723
+ "Start_State_steps_per_second": 23.542,
1724
+ "epoch": 78.26086956521739,
1725
+ "step": 1800
1726
+ },
1727
+ {
1728
+ "Raw_Model_loss": 0.8706566691398621,
1729
+ "Raw_Model_runtime": 0.4195,
1730
+ "Raw_Model_samples_per_second": 23.839,
1731
+ "Raw_Model_steps_per_second": 23.839,
1732
+ "epoch": 78.26086956521739,
1733
+ "step": 1800
1734
+ },
1735
+ {
1736
+ "SWA_loss": 0.7383162379264832,
1737
+ "SWA_runtime": 0.4011,
1738
+ "SWA_samples_per_second": 24.93,
1739
+ "SWA_steps_per_second": 24.93,
1740
+ "epoch": 78.26086956521739,
1741
+ "step": 1800
1742
+ },
1743
+ {
1744
+ "EMA_loss": 0.8612034916877747,
1745
+ "EMA_runtime": 0.4073,
1746
+ "EMA_samples_per_second": 24.55,
1747
+ "EMA_steps_per_second": 24.55,
1748
+ "epoch": 78.26086956521739,
1749
+ "step": 1800
1750
  }
1751
  ],
1752
  "logging_steps": 10,
 
1766
  "attributes": {}
1767
  }
1768
  },
1769
+ "total_flos": 4.631084552967782e+16,
1770
  "train_batch_size": 4,
1771
  "trial_name": null,
1772
  "trial_params": null