CocoRoF commited on
Commit
542d160
·
verified ·
1 Parent(s): cd93d0b

Training in progress, step 2134, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4531cecc4e308a35dc907f81b5204f58f87403a6008584884dae9f8d5e3178ad
3
  size 735217848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d13135869408c362410b6d65a9ecf14fcbec80f6a15aee0cf299eff51f090af
3
  size 735217848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41adcafe7f4fb36e8df90cb2a36c6fc9005eadf99b272fd5a69c8b1c71c58878
3
  size 1470521978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9a0b7a39de0aa8d9888d2f2710d48cfcf1e88b35f597dd289a68fd843ed1b1b
3
  size 1470521978
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0886b5e6b4eb6c54d008834760837138a75d96ac8156628b1654cc847af0e990
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e929bfb8d61dabc9ff9440d99be02b793be97dcf206c259cdc957e3702b21cb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5146063efca63e5eea8f3db237a9d2214ab60b2b370c91142e2b5e596c8cc2ad
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d9e4b230e7ad00add7adfd0d7846e7a79fb156cfc157db8c226ab66e3110285
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9372071227741331,
5
  "eval_steps": 100,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1727,6 +1727,113 @@
1727
  "eval_spearman_manhattan": 0.7818782293686074,
1728
  "eval_steps_per_second": 19.865,
1729
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1730
  }
1731
  ],
1732
  "logging_steps": 10,
@@ -1741,7 +1848,7 @@
1741
  "should_evaluate": false,
1742
  "should_log": false,
1743
  "should_save": true,
1744
- "should_training_stop": false
1745
  },
1746
  "attributes": {}
1747
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 100,
6
+ "global_step": 2134,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1727
  "eval_spearman_manhattan": 0.7818782293686074,
1728
  "eval_steps_per_second": 19.865,
1729
  "step": 2000
1730
+ },
1731
+ {
1732
+ "epoch": 0.9418931583880038,
1733
+ "grad_norm": 1.3811417818069458,
1734
+ "learning_rate": 8.822633552014996e-05,
1735
+ "loss": 0.2759,
1736
+ "step": 2010
1737
+ },
1738
+ {
1739
+ "epoch": 0.9465791940018744,
1740
+ "grad_norm": 2.8094141483306885,
1741
+ "learning_rate": 8.816776007497658e-05,
1742
+ "loss": 0.3046,
1743
+ "step": 2020
1744
+ },
1745
+ {
1746
+ "epoch": 0.9512652296157451,
1747
+ "grad_norm": 1.8067703247070312,
1748
+ "learning_rate": 8.810918462980319e-05,
1749
+ "loss": 0.314,
1750
+ "step": 2030
1751
+ },
1752
+ {
1753
+ "epoch": 0.9559512652296157,
1754
+ "grad_norm": 1.9540753364562988,
1755
+ "learning_rate": 8.805060918462981e-05,
1756
+ "loss": 0.2965,
1757
+ "step": 2040
1758
+ },
1759
+ {
1760
+ "epoch": 0.9606373008434864,
1761
+ "grad_norm": 2.20998477935791,
1762
+ "learning_rate": 8.799203373945643e-05,
1763
+ "loss": 0.3278,
1764
+ "step": 2050
1765
+ },
1766
+ {
1767
+ "epoch": 0.9653233364573571,
1768
+ "grad_norm": 2.156224250793457,
1769
+ "learning_rate": 8.793345829428304e-05,
1770
+ "loss": 0.3202,
1771
+ "step": 2060
1772
+ },
1773
+ {
1774
+ "epoch": 0.9700093720712277,
1775
+ "grad_norm": 1.6349718570709229,
1776
+ "learning_rate": 8.787488284910965e-05,
1777
+ "loss": 0.3192,
1778
+ "step": 2070
1779
+ },
1780
+ {
1781
+ "epoch": 0.9746954076850984,
1782
+ "grad_norm": 1.7507764101028442,
1783
+ "learning_rate": 8.781630740393628e-05,
1784
+ "loss": 0.3148,
1785
+ "step": 2080
1786
+ },
1787
+ {
1788
+ "epoch": 0.979381443298969,
1789
+ "grad_norm": 1.6444741487503052,
1790
+ "learning_rate": 8.77577319587629e-05,
1791
+ "loss": 0.3009,
1792
+ "step": 2090
1793
+ },
1794
+ {
1795
+ "epoch": 0.9840674789128397,
1796
+ "grad_norm": 1.5341808795928955,
1797
+ "learning_rate": 8.769915651358951e-05,
1798
+ "loss": 0.3119,
1799
+ "step": 2100
1800
+ },
1801
+ {
1802
+ "epoch": 0.9840674789128397,
1803
+ "eval_loss": 0.04616248607635498,
1804
+ "eval_pearson_cosine": 0.8028400286804569,
1805
+ "eval_pearson_dot": 0.8142363636471543,
1806
+ "eval_pearson_euclidean": 0.7825157658447595,
1807
+ "eval_pearson_manhattan": 0.7827871893341012,
1808
+ "eval_runtime": 4.9143,
1809
+ "eval_samples_per_second": 305.23,
1810
+ "eval_spearman_cosine": 0.805895283206979,
1811
+ "eval_spearman_dot": 0.8167650665649905,
1812
+ "eval_spearman_euclidean": 0.7880766458177777,
1813
+ "eval_spearman_manhattan": 0.7880451603002018,
1814
+ "eval_steps_per_second": 19.128,
1815
+ "step": 2100
1816
+ },
1817
+ {
1818
+ "epoch": 0.9887535145267105,
1819
+ "grad_norm": 2.1978578567504883,
1820
+ "learning_rate": 8.764058106841613e-05,
1821
+ "loss": 0.3473,
1822
+ "step": 2110
1823
+ },
1824
+ {
1825
+ "epoch": 0.993439550140581,
1826
+ "grad_norm": 1.6111299991607666,
1827
+ "learning_rate": 8.758200562324273e-05,
1828
+ "loss": 0.2743,
1829
+ "step": 2120
1830
+ },
1831
+ {
1832
+ "epoch": 0.9981255857544518,
1833
+ "grad_norm": 2.0017919540405273,
1834
+ "learning_rate": 8.752343017806935e-05,
1835
+ "loss": 0.3201,
1836
+ "step": 2130
1837
  }
1838
  ],
1839
  "logging_steps": 10,
 
1848
  "should_evaluate": false,
1849
  "should_log": false,
1850
  "should_save": true,
1851
+ "should_training_stop": true
1852
  },
1853
  "attributes": {}
1854
  }