FredericFan commited on
Commit
917f41d
·
verified ·
1 Parent(s): 4d4cbfa

Training in progress, step 12000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec779206cf54d6ec2f38c65a076b00fc932c67753a52bdb7f8e2048ec7b5aa72
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6989d08304817f7eda2a41817fe6fdf1039613dcc5ce63bc28b05cebb5b0a729
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4aa8d2bcba67d1c3e0e255eaa792d3f5d5d742e9420328ca489a27ffc5e297e
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef2622b356cc24fc03d43081c989231e41a9f98afae1477feeac80752fcbe27
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cdc0a7876cc40e108078f019affd5e2fa19bd7432063cc097ebdc6e0c9f6bc0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25c75df0b33157e12f11389fab2a782cb08091622aab636059c475b184fac12c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06badf22ccd7f837423b8b17ce3c2141862926d038c21fb5506d2929cbbb4d8f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba896b8260293f6060945e9d4b6d72d879757324e56407782c7dd941c44937b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08293735980987549,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-11500",
4
- "epoch": 0.92,
5
  "eval_steps": 500,
6
- "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1801,6 +1801,84 @@
1801
  "eval_samples_per_second": 22.711,
1802
  "eval_steps_per_second": 5.678,
1803
  "step": 11500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1804
  }
1805
  ],
1806
  "logging_steps": 50,
@@ -1820,7 +1898,7 @@
1820
  "attributes": {}
1821
  }
1822
  },
1823
- "total_flos": 2.801206296576e+16,
1824
  "train_batch_size": 4,
1825
  "trial_name": null,
1826
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.082674041390419,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-12000",
4
+ "epoch": 0.96,
5
  "eval_steps": 500,
6
+ "global_step": 12000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1801
  "eval_samples_per_second": 22.711,
1802
  "eval_steps_per_second": 5.678,
1803
  "step": 11500
1804
+ },
1805
+ {
1806
+ "epoch": 0.924,
1807
+ "grad_norm": 0.12364531308412552,
1808
+ "learning_rate": 1.61424e-05,
1809
+ "loss": 0.0624,
1810
+ "step": 11550
1811
+ },
1812
+ {
1813
+ "epoch": 0.928,
1814
+ "grad_norm": 0.15913207828998566,
1815
+ "learning_rate": 1.60824e-05,
1816
+ "loss": 0.061,
1817
+ "step": 11600
1818
+ },
1819
+ {
1820
+ "epoch": 0.932,
1821
+ "grad_norm": 0.16585472226142883,
1822
+ "learning_rate": 1.60224e-05,
1823
+ "loss": 0.066,
1824
+ "step": 11650
1825
+ },
1826
+ {
1827
+ "epoch": 0.936,
1828
+ "grad_norm": 0.13831599056720734,
1829
+ "learning_rate": 1.59624e-05,
1830
+ "loss": 0.0578,
1831
+ "step": 11700
1832
+ },
1833
+ {
1834
+ "epoch": 0.94,
1835
+ "grad_norm": 0.1819075047969818,
1836
+ "learning_rate": 1.59024e-05,
1837
+ "loss": 0.0593,
1838
+ "step": 11750
1839
+ },
1840
+ {
1841
+ "epoch": 0.944,
1842
+ "grad_norm": 0.16260235011577606,
1843
+ "learning_rate": 1.58424e-05,
1844
+ "loss": 0.0647,
1845
+ "step": 11800
1846
+ },
1847
+ {
1848
+ "epoch": 0.948,
1849
+ "grad_norm": 0.1420051008462906,
1850
+ "learning_rate": 1.57824e-05,
1851
+ "loss": 0.0556,
1852
+ "step": 11850
1853
+ },
1854
+ {
1855
+ "epoch": 0.952,
1856
+ "grad_norm": 0.17357395589351654,
1857
+ "learning_rate": 1.57224e-05,
1858
+ "loss": 0.0585,
1859
+ "step": 11900
1860
+ },
1861
+ {
1862
+ "epoch": 0.956,
1863
+ "grad_norm": 0.1465808004140854,
1864
+ "learning_rate": 1.56624e-05,
1865
+ "loss": 0.0633,
1866
+ "step": 11950
1867
+ },
1868
+ {
1869
+ "epoch": 0.96,
1870
+ "grad_norm": 0.18425939977169037,
1871
+ "learning_rate": 1.56024e-05,
1872
+ "loss": 0.061,
1873
+ "step": 12000
1874
+ },
1875
+ {
1876
+ "epoch": 0.96,
1877
+ "eval_loss": 0.082674041390419,
1878
+ "eval_runtime": 88.0509,
1879
+ "eval_samples_per_second": 22.714,
1880
+ "eval_steps_per_second": 5.679,
1881
+ "step": 12000
1882
  }
1883
  ],
1884
  "logging_steps": 50,
 
1898
  "attributes": {}
1899
  }
1900
  },
1901
+ "total_flos": 2.922997874688e+16,
1902
  "train_batch_size": 4,
1903
  "trial_name": null,
1904
  "trial_params": null