NairaRahim commited on
Commit
1fe9b8d
·
verified ·
1 Parent(s): 0173d28

Training in progress, epoch 18, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48c4ff7c723a1e608d989b96e44d892070778ec83c952e55c57dddf3c3f48178
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:984c4064387b5d2acb50bbb73550ab8e59284bb7304c8ae2481fd6b52ff38e6a
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d0a831ac586fb1be369d61d6f0cc41522b60258bcdd25b76e42085747f41185
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69dce8d19e012dd3ce3e9993c52659d81190417ccecc2e0abd73083314d194c9
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46332136f176afc146e73eb2e78a93d7beb2c41f4d0c62f6c39855d4ba7b1979
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50b8fc98da5c711399560346c0f91ffc2f6dec6609f8c3720258169ba848b497
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:855574b974c7e1ac9f8ed715f000f5b33d3a42b4a2102f47eec78477a0831f87
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b8e678712001404d086acc835dbbf2d80efd02cf92f10187d53131141ee5f90
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.54485321044922,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
4
- "epoch": 17.0,
5
  "eval_steps": 500,
6
- "global_step": 22185,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1690,6 +1690,105 @@
1690
  "eval_samples_per_second": 26.464,
1691
  "eval_steps_per_second": 3.326,
1692
  "step": 22185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1693
  }
1694
  ],
1695
  "logging_steps": 100,
@@ -1704,7 +1803,7 @@
1704
  "early_stopping_threshold": 0.0
1705
  },
1706
  "attributes": {
1707
- "early_stopping_patience_counter": 1
1708
  }
1709
  },
1710
  "TrainerControl": {
@@ -1718,7 +1817,7 @@
1718
  "attributes": {}
1719
  }
1720
  },
1721
- "total_flos": 2.3924285351801856e+16,
1722
  "train_batch_size": 8,
1723
  "trial_name": null,
1724
  "trial_params": null
 
1
  {
2
  "best_metric": 34.54485321044922,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
4
+ "epoch": 18.0,
5
  "eval_steps": 500,
6
+ "global_step": 23490,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1690
  "eval_samples_per_second": 26.464,
1691
  "eval_steps_per_second": 3.326,
1692
  "step": 22185
1693
+ },
1694
+ {
1695
+ "epoch": 17.011494252873565,
1696
+ "grad_norm": 4.7622528076171875,
1697
+ "learning_rate": 3.937260536398468e-05,
1698
+ "loss": 34.3268,
1699
+ "step": 22200
1700
+ },
1701
+ {
1702
+ "epoch": 17.088122605363985,
1703
+ "grad_norm": 2.9908533096313477,
1704
+ "learning_rate": 3.9324712643678164e-05,
1705
+ "loss": 33.4477,
1706
+ "step": 22300
1707
+ },
1708
+ {
1709
+ "epoch": 17.164750957854405,
1710
+ "grad_norm": 2.2341110706329346,
1711
+ "learning_rate": 3.927681992337165e-05,
1712
+ "loss": 33.6793,
1713
+ "step": 22400
1714
+ },
1715
+ {
1716
+ "epoch": 17.24137931034483,
1717
+ "grad_norm": 2.3946852684020996,
1718
+ "learning_rate": 3.922892720306514e-05,
1719
+ "loss": 33.2578,
1720
+ "step": 22500
1721
+ },
1722
+ {
1723
+ "epoch": 17.31800766283525,
1724
+ "grad_norm": 3.3899614810943604,
1725
+ "learning_rate": 3.9181034482758625e-05,
1726
+ "loss": 33.2486,
1727
+ "step": 22600
1728
+ },
1729
+ {
1730
+ "epoch": 17.39463601532567,
1731
+ "grad_norm": 5.150006294250488,
1732
+ "learning_rate": 3.9133141762452105e-05,
1733
+ "loss": 33.0265,
1734
+ "step": 22700
1735
+ },
1736
+ {
1737
+ "epoch": 17.47126436781609,
1738
+ "grad_norm": 2.8135523796081543,
1739
+ "learning_rate": 3.908524904214559e-05,
1740
+ "loss": 33.4384,
1741
+ "step": 22800
1742
+ },
1743
+ {
1744
+ "epoch": 17.547892720306514,
1745
+ "grad_norm": 2.5454325675964355,
1746
+ "learning_rate": 3.903735632183908e-05,
1747
+ "loss": 33.4139,
1748
+ "step": 22900
1749
+ },
1750
+ {
1751
+ "epoch": 17.624521072796934,
1752
+ "grad_norm": 4.680717945098877,
1753
+ "learning_rate": 3.8989463601532566e-05,
1754
+ "loss": 34.0209,
1755
+ "step": 23000
1756
+ },
1757
+ {
1758
+ "epoch": 17.701149425287355,
1759
+ "grad_norm": 4.242103099822998,
1760
+ "learning_rate": 3.894157088122606e-05,
1761
+ "loss": 33.1372,
1762
+ "step": 23100
1763
+ },
1764
+ {
1765
+ "epoch": 17.77777777777778,
1766
+ "grad_norm": 2.639352798461914,
1767
+ "learning_rate": 3.8893678160919546e-05,
1768
+ "loss": 33.3558,
1769
+ "step": 23200
1770
+ },
1771
+ {
1772
+ "epoch": 17.8544061302682,
1773
+ "grad_norm": 1.9746617078781128,
1774
+ "learning_rate": 3.884578544061303e-05,
1775
+ "loss": 33.7639,
1776
+ "step": 23300
1777
+ },
1778
+ {
1779
+ "epoch": 17.93103448275862,
1780
+ "grad_norm": 4.005228519439697,
1781
+ "learning_rate": 3.879837164750958e-05,
1782
+ "loss": 33.0241,
1783
+ "step": 23400
1784
+ },
1785
+ {
1786
+ "epoch": 18.0,
1787
+ "eval_loss": 34.649261474609375,
1788
+ "eval_runtime": 49.2606,
1789
+ "eval_samples_per_second": 26.492,
1790
+ "eval_steps_per_second": 3.329,
1791
+ "step": 23490
1792
  }
1793
  ],
1794
  "logging_steps": 100,
 
1803
  "early_stopping_threshold": 0.0
1804
  },
1805
  "attributes": {
1806
+ "early_stopping_patience_counter": 2
1807
  }
1808
  },
1809
  "TrainerControl": {
 
1817
  "attributes": {}
1818
  }
1819
  },
1820
+ "total_flos": 2.5331596254849024e+16,
1821
  "train_batch_size": 8,
1822
  "trial_name": null,
1823
  "trial_params": null