mgh6 commited on
Commit
db5c088
·
verified ·
1 Parent(s): b7ce5ce

Training in progress, epoch 17, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85a37ab425a5bb455eb5138be42e451b4f6504a4ec63d8c4323c617cb3ebc33e
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:659e1a6a1ff1a9fbec759df1d78ad027f1d391213c1a217bf5b92f48ccb3b2a7
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb0e7b73e702883e0aea72dd82fc77ae713a9e1e0ea8fae495ef88376c79291d
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d5e4710df7ed104bcaa0a9051033613a80b4363c6f76bae6c58a4863fa30ad
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2f665ad4a75c89f6711748909d7b7bdbe353d41461e39d9efde1e7c8c122662
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acb524c16816d50e3851812f08ed31af0f04d2253498b52b5121962571f22c75
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8db07ed3922379a7896753ff18d75baed592f87392b70da4afc38e708f68e8df
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee6db23f5e321edc29e923809ce0b3dff2a73c49bba17a4de22c710250ea7d6e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.997999636297507,
5
  "eval_steps": 50,
6
- "global_step": 5831,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1747,6 +1747,111 @@
1747
  "eval_samples_per_second": 41.458,
1748
  "eval_steps_per_second": 20.729,
1749
  "step": 5800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1750
  }
1751
  ],
1752
  "logging_steps": 50,
@@ -1766,7 +1871,7 @@
1766
  "attributes": {}
1767
  }
1768
  },
1769
- "total_flos": 1.5187665302784573e+18,
1770
  "train_batch_size": 2,
1771
  "trial_name": null,
1772
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.997999636297507,
5
  "eval_steps": 50,
6
+ "global_step": 6174,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1747
  "eval_samples_per_second": 41.458,
1748
  "eval_steps_per_second": 20.729,
1749
  "step": 5800
1750
+ },
1751
+ {
1752
+ "epoch": 17.055282778687033,
1753
+ "grad_norm": 69.1272201538086,
1754
+ "learning_rate": 1.4723032069970846e-05,
1755
+ "loss": 0.686,
1756
+ "step": 5850
1757
+ },
1758
+ {
1759
+ "epoch": 17.055282778687033,
1760
+ "eval_loss": 0.662264883518219,
1761
+ "eval_runtime": 116.5098,
1762
+ "eval_samples_per_second": 41.421,
1763
+ "eval_steps_per_second": 20.711,
1764
+ "step": 5850
1765
+ },
1766
+ {
1767
+ "epoch": 17.20076377523186,
1768
+ "grad_norm": 46.41282653808594,
1769
+ "learning_rate": 1.3994169096209913e-05,
1770
+ "loss": 0.678,
1771
+ "step": 5900
1772
+ },
1773
+ {
1774
+ "epoch": 17.20076377523186,
1775
+ "eval_loss": 0.6592727899551392,
1776
+ "eval_runtime": 116.359,
1777
+ "eval_samples_per_second": 41.475,
1778
+ "eval_steps_per_second": 20.738,
1779
+ "step": 5900
1780
+ },
1781
+ {
1782
+ "epoch": 17.346244771776686,
1783
+ "grad_norm": 69.46429443359375,
1784
+ "learning_rate": 1.3265306122448982e-05,
1785
+ "loss": 0.6799,
1786
+ "step": 5950
1787
+ },
1788
+ {
1789
+ "epoch": 17.346244771776686,
1790
+ "eval_loss": 0.6556416153907776,
1791
+ "eval_runtime": 116.5416,
1792
+ "eval_samples_per_second": 41.41,
1793
+ "eval_steps_per_second": 20.705,
1794
+ "step": 5950
1795
+ },
1796
+ {
1797
+ "epoch": 17.491725768321512,
1798
+ "grad_norm": 53.518436431884766,
1799
+ "learning_rate": 1.2536443148688048e-05,
1800
+ "loss": 0.6783,
1801
+ "step": 6000
1802
+ },
1803
+ {
1804
+ "epoch": 17.491725768321512,
1805
+ "eval_loss": 0.6580107808113098,
1806
+ "eval_runtime": 116.1219,
1807
+ "eval_samples_per_second": 41.56,
1808
+ "eval_steps_per_second": 20.78,
1809
+ "step": 6000
1810
+ },
1811
+ {
1812
+ "epoch": 17.637206764866338,
1813
+ "grad_norm": 57.20518493652344,
1814
+ "learning_rate": 1.1807580174927114e-05,
1815
+ "loss": 0.6784,
1816
+ "step": 6050
1817
+ },
1818
+ {
1819
+ "epoch": 17.637206764866338,
1820
+ "eval_loss": 0.6599807143211365,
1821
+ "eval_runtime": 116.2815,
1822
+ "eval_samples_per_second": 41.503,
1823
+ "eval_steps_per_second": 20.751,
1824
+ "step": 6050
1825
+ },
1826
+ {
1827
+ "epoch": 17.782687761411164,
1828
+ "grad_norm": 49.93397521972656,
1829
+ "learning_rate": 1.1078717201166182e-05,
1830
+ "loss": 0.676,
1831
+ "step": 6100
1832
+ },
1833
+ {
1834
+ "epoch": 17.782687761411164,
1835
+ "eval_loss": 0.6560738682746887,
1836
+ "eval_runtime": 116.7097,
1837
+ "eval_samples_per_second": 41.35,
1838
+ "eval_steps_per_second": 20.675,
1839
+ "step": 6100
1840
+ },
1841
+ {
1842
+ "epoch": 17.92816875795599,
1843
+ "grad_norm": 48.3265495300293,
1844
+ "learning_rate": 1.0349854227405248e-05,
1845
+ "loss": 0.6778,
1846
+ "step": 6150
1847
+ },
1848
+ {
1849
+ "epoch": 17.92816875795599,
1850
+ "eval_loss": 0.6563200950622559,
1851
+ "eval_runtime": 116.4229,
1852
+ "eval_samples_per_second": 41.452,
1853
+ "eval_steps_per_second": 20.726,
1854
+ "step": 6150
1855
  }
1856
  ],
1857
  "logging_steps": 50,
 
1871
  "attributes": {}
1872
  }
1873
  },
1874
+ "total_flos": 1.6080269119827476e+18,
1875
  "train_batch_size": 2,
1876
  "trial_name": null,
1877
  "trial_params": null