CocoRoF commited on
Commit
be29b9d
·
verified ·
1 Parent(s): 963ab98

Training in progress, step 2134, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:784ba4d6b323f80b32bdeb7a699b9493ed33877eb1f5a4680739df590c874265
3
  size 735217848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10f2f471ddb68aeb84eaedafdfdcf37d93db8b63aabad73c90654c0ff2b5c6c
3
  size 735217848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:463d10606216eaac5990abb66ecffc932b652d3d4835e35f52c38d3e543733ff
3
  size 1470521978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:786bac647cdfb3e95caf79a33fc77addeb1581415ccb52e3e6a59e8aa9baf708
3
  size 1470521978
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0886b5e6b4eb6c54d008834760837138a75d96ac8156628b1654cc847af0e990
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e929bfb8d61dabc9ff9440d99be02b793be97dcf206c259cdc957e3702b21cb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd992b247e47b39b5cc00365846a4ed7e75fe3dd899b2263e283f44789d5c49b
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0470658ec86377919cf99d26abf4d40a040955151803301b44a6b653da17e99d
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9372071227741331,
5
  "eval_steps": 100,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1727,6 +1727,113 @@
1727
  "eval_spearman_manhattan": 0.8295593456330026,
1728
  "eval_steps_per_second": 15.61,
1729
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1730
  }
1731
  ],
1732
  "logging_steps": 10,
@@ -1741,7 +1848,7 @@
1741
  "should_evaluate": false,
1742
  "should_log": false,
1743
  "should_save": true,
1744
- "should_training_stop": false
1745
  },
1746
  "attributes": {}
1747
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 100,
6
+ "global_step": 2134,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1727
  "eval_spearman_manhattan": 0.8295593456330026,
1728
  "eval_steps_per_second": 15.61,
1729
  "step": 2000
1730
+ },
1731
+ {
1732
+ "epoch": 0.9418931583880038,
1733
+ "grad_norm": 1.1670805215835571,
1734
+ "learning_rate": 4.411316776007498e-05,
1735
+ "loss": 0.2544,
1736
+ "step": 2010
1737
+ },
1738
+ {
1739
+ "epoch": 0.9465791940018744,
1740
+ "grad_norm": 1.6743440628051758,
1741
+ "learning_rate": 4.408388003748829e-05,
1742
+ "loss": 0.2866,
1743
+ "step": 2020
1744
+ },
1745
+ {
1746
+ "epoch": 0.9512652296157451,
1747
+ "grad_norm": 1.5439425706863403,
1748
+ "learning_rate": 4.4054592314901596e-05,
1749
+ "loss": 0.286,
1750
+ "step": 2030
1751
+ },
1752
+ {
1753
+ "epoch": 0.9559512652296157,
1754
+ "grad_norm": 1.317328691482544,
1755
+ "learning_rate": 4.4025304592314905e-05,
1756
+ "loss": 0.2674,
1757
+ "step": 2040
1758
+ },
1759
+ {
1760
+ "epoch": 0.9606373008434864,
1761
+ "grad_norm": 1.3477058410644531,
1762
+ "learning_rate": 4.399601686972821e-05,
1763
+ "loss": 0.3005,
1764
+ "step": 2050
1765
+ },
1766
+ {
1767
+ "epoch": 0.9653233364573571,
1768
+ "grad_norm": 1.8991141319274902,
1769
+ "learning_rate": 4.396672914714152e-05,
1770
+ "loss": 0.2924,
1771
+ "step": 2060
1772
+ },
1773
+ {
1774
+ "epoch": 0.9700093720712277,
1775
+ "grad_norm": 1.406160593032837,
1776
+ "learning_rate": 4.3937441424554824e-05,
1777
+ "loss": 0.2977,
1778
+ "step": 2070
1779
+ },
1780
+ {
1781
+ "epoch": 0.9746954076850984,
1782
+ "grad_norm": 1.7128149271011353,
1783
+ "learning_rate": 4.390815370196814e-05,
1784
+ "loss": 0.3003,
1785
+ "step": 2080
1786
+ },
1787
+ {
1788
+ "epoch": 0.979381443298969,
1789
+ "grad_norm": 1.2714091539382935,
1790
+ "learning_rate": 4.387886597938145e-05,
1791
+ "loss": 0.2807,
1792
+ "step": 2090
1793
+ },
1794
+ {
1795
+ "epoch": 0.9840674789128397,
1796
+ "grad_norm": 1.1022099256515503,
1797
+ "learning_rate": 4.3849578256794756e-05,
1798
+ "loss": 0.2889,
1799
+ "step": 2100
1800
+ },
1801
+ {
1802
+ "epoch": 0.9840674789128397,
1803
+ "eval_loss": 0.04628630727529526,
1804
+ "eval_pearson_cosine": 0.8264380630920911,
1805
+ "eval_pearson_dot": 0.765122231613347,
1806
+ "eval_pearson_euclidean": 0.8297712335935188,
1807
+ "eval_pearson_manhattan": 0.8303968843751832,
1808
+ "eval_runtime": 5.8458,
1809
+ "eval_samples_per_second": 256.595,
1810
+ "eval_spearman_cosine": 0.8280517554752953,
1811
+ "eval_spearman_dot": 0.7620029087019149,
1812
+ "eval_spearman_euclidean": 0.8362970969318975,
1813
+ "eval_spearman_manhattan": 0.8368418361210694,
1814
+ "eval_steps_per_second": 16.08,
1815
+ "step": 2100
1816
+ },
1817
+ {
1818
+ "epoch": 0.9887535145267105,
1819
+ "grad_norm": 1.5780407190322876,
1820
+ "learning_rate": 4.3820290534208064e-05,
1821
+ "loss": 0.3019,
1822
+ "step": 2110
1823
+ },
1824
+ {
1825
+ "epoch": 0.993439550140581,
1826
+ "grad_norm": 1.5804523229599,
1827
+ "learning_rate": 4.3791002811621366e-05,
1828
+ "loss": 0.254,
1829
+ "step": 2120
1830
+ },
1831
+ {
1832
+ "epoch": 0.9981255857544518,
1833
+ "grad_norm": 1.5754133462905884,
1834
+ "learning_rate": 4.3761715089034675e-05,
1835
+ "loss": 0.2948,
1836
+ "step": 2130
1837
  }
1838
  ],
1839
  "logging_steps": 10,
 
1848
  "should_evaluate": false,
1849
  "should_log": false,
1850
  "should_save": true,
1851
+ "should_training_stop": true
1852
  },
1853
  "attributes": {}
1854
  }