FredericFan commited on
Commit
d889bb3
·
verified ·
1 Parent(s): 8619794

Training in progress, step 11500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d578e67cefa0cbc87c4588c8c5826c14d9ecd5d9fbb6cb344342920af139033
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec779206cf54d6ec2f38c65a076b00fc932c67753a52bdb7f8e2048ec7b5aa72
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:178bbcd230b3578cd203d8ae6c2add9e74bb7aeabb94d447a3f70919fc0e8241
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4aa8d2bcba67d1c3e0e255eaa792d3f5d5d742e9420328ca489a27ffc5e297e
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b125975f9fae8190f9270b46403bcee0bbce4a754ced1327bdd16ba81bfb9533
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cdc0a7876cc40e108078f019affd5e2fa19bd7432063cc097ebdc6e0c9f6bc0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:224b826add4bdc720485795d14278c83b0b542b6e36e3b8f6419da3fb4a5e74d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06badf22ccd7f837423b8b17ce3c2141862926d038c21fb5506d2929cbbb4d8f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08301527053117752,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-11000",
4
- "epoch": 0.88,
5
  "eval_steps": 500,
6
- "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1723,6 +1723,84 @@
1723
  "eval_samples_per_second": 22.71,
1724
  "eval_steps_per_second": 5.678,
1725
  "step": 11000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1726
  }
1727
  ],
1728
  "logging_steps": 50,
@@ -1742,7 +1820,7 @@
1742
  "attributes": {}
1743
  }
1744
  },
1745
- "total_flos": 2.679414718464e+16,
1746
  "train_batch_size": 4,
1747
  "trial_name": null,
1748
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08293735980987549,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-11500",
4
+ "epoch": 0.92,
5
  "eval_steps": 500,
6
+ "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1723
  "eval_samples_per_second": 22.71,
1724
  "eval_steps_per_second": 5.678,
1725
  "step": 11000
1726
+ },
1727
+ {
1728
+ "epoch": 0.884,
1729
+ "grad_norm": 0.22617511451244354,
1730
+ "learning_rate": 1.67424e-05,
1731
+ "loss": 0.066,
1732
+ "step": 11050
1733
+ },
1734
+ {
1735
+ "epoch": 0.888,
1736
+ "grad_norm": 0.15583598613739014,
1737
+ "learning_rate": 1.66824e-05,
1738
+ "loss": 0.0663,
1739
+ "step": 11100
1740
+ },
1741
+ {
1742
+ "epoch": 0.892,
1743
+ "grad_norm": 0.06502597779035568,
1744
+ "learning_rate": 1.6622400000000002e-05,
1745
+ "loss": 0.0611,
1746
+ "step": 11150
1747
+ },
1748
+ {
1749
+ "epoch": 0.896,
1750
+ "grad_norm": 0.1777944415807724,
1751
+ "learning_rate": 1.6562400000000003e-05,
1752
+ "loss": 0.0618,
1753
+ "step": 11200
1754
+ },
1755
+ {
1756
+ "epoch": 0.9,
1757
+ "grad_norm": 0.14777645468711853,
1758
+ "learning_rate": 1.65024e-05,
1759
+ "loss": 0.0616,
1760
+ "step": 11250
1761
+ },
1762
+ {
1763
+ "epoch": 0.904,
1764
+ "grad_norm": 0.1988598108291626,
1765
+ "learning_rate": 1.64424e-05,
1766
+ "loss": 0.0683,
1767
+ "step": 11300
1768
+ },
1769
+ {
1770
+ "epoch": 0.908,
1771
+ "grad_norm": 0.16424456238746643,
1772
+ "learning_rate": 1.63824e-05,
1773
+ "loss": 0.0616,
1774
+ "step": 11350
1775
+ },
1776
+ {
1777
+ "epoch": 0.912,
1778
+ "grad_norm": 0.17275363206863403,
1779
+ "learning_rate": 1.63224e-05,
1780
+ "loss": 0.0603,
1781
+ "step": 11400
1782
+ },
1783
+ {
1784
+ "epoch": 0.916,
1785
+ "grad_norm": 0.1208202987909317,
1786
+ "learning_rate": 1.6262400000000002e-05,
1787
+ "loss": 0.0623,
1788
+ "step": 11450
1789
+ },
1790
+ {
1791
+ "epoch": 0.92,
1792
+ "grad_norm": 0.12853899598121643,
1793
+ "learning_rate": 1.62024e-05,
1794
+ "loss": 0.0597,
1795
+ "step": 11500
1796
+ },
1797
+ {
1798
+ "epoch": 0.92,
1799
+ "eval_loss": 0.08293735980987549,
1800
+ "eval_runtime": 88.063,
1801
+ "eval_samples_per_second": 22.711,
1802
+ "eval_steps_per_second": 5.678,
1803
+ "step": 11500
1804
  }
1805
  ],
1806
  "logging_steps": 50,
 
1820
  "attributes": {}
1821
  }
1822
  },
1823
+ "total_flos": 2.801206296576e+16,
1824
  "train_batch_size": 4,
1825
  "trial_name": null,
1826
  "trial_params": null