FormlessAI commited on
Commit
aef6437
·
verified ·
1 Parent(s): 3b50fd4

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44441ec494dd1eeddb4f4b1f003d97643c00cca698aa10a2254f4b4bdacb8704
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0762ec721b93d1a0e10ada578c7538ccb87f010928b297b4505a645b3aec697
3
  size 98088784
last-checkpoint/global_step1150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa1fd94ec52151cc8ac9118abed11c7a3e7a5973cd11ebe89a119398b424d040
3
+ size 73939813
last-checkpoint/global_step1150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c28d9686a61da27a86d8432e4c1c6f8448d185febb9dc62e63da04f25aef4400
3
+ size 73939813
last-checkpoint/global_step1150/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ae989bbd465e0ea99a6c57870c991c867715b51aa2fce0ea3fb262bedbf097a
3
+ size 73939877
last-checkpoint/global_step1150/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb6576ebf655c7cbdf4585f340cae9ee67e706b6193cc4a03c863634805807ac
3
+ size 73939877
last-checkpoint/global_step1150/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9590c7cd283fe8a45bd69e18382a39acaeba5bc967eccf83e04a9c12c1af5ea8
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1100
 
1
+ global_step1150
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ceb0ba0fefc4682de8ae9d502be348b266aef51d2c517ea10d576e3957cf16e
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32e2c783f044e208693875b6618820b4692ab8369227ed5fcfe75de8c98cb2f5
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7f0589f852327dcbb3a04372c5c9b3b3aed87183a18e4e78c8842af6ccc94ea
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9a009ec584589b323bfde6fb332132397a948a68665dbf47ae6b13108a76ac8
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f4aa42ba29fbaf89d327737bbdbe96fa7085e909f789a4b592724ea39fd0491
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eba47f1f3f2aaeb1ee30212c3d28966395e9b15ce04d718f220251a1b885544
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ed1a940d9e87126bc4746d90070268ad6d65dcc8b4794a5c83d93738db2dc6b
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:969e35a2eee24aa5d0640e276157b14ed3586e426e68f6139c80b9bdb3012f62
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41aec0a0f7fd8e266c974eb692fe1a8c668e3b6745d80b43c921e581b091927b
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f599b3f2fdaee9f298de483bc342667a86479cffdd08dfb05aebfb998561b471
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6880703568458557,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.0783833435394978,
6
  "eval_steps": 50,
7
- "global_step": 1100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1724,6 +1724,84 @@
1724
  "eval_samples_per_second": 125.981,
1725
  "eval_steps_per_second": 15.756,
1726
  "step": 1100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1727
  }
1728
  ],
1729
  "logging_steps": 5,
@@ -1752,7 +1830,7 @@
1752
  "attributes": {}
1753
  }
1754
  },
1755
- "total_flos": 5.6795728734846976e+17,
1756
  "train_batch_size": 2,
1757
  "trial_name": null,
1758
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6847204566001892,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.127372933251684,
6
  "eval_steps": 50,
7
+ "global_step": 1150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1724
  "eval_samples_per_second": 125.981,
1725
  "eval_steps_per_second": 15.756,
1726
  "step": 1100
1727
+ },
1728
+ {
1729
+ "epoch": 1.0832823025107166,
1730
+ "grad_norm": 0.2049368917942047,
1731
+ "learning_rate": 9.97107409377544e-05,
1732
+ "loss": 0.7052,
1733
+ "step": 1105
1734
+ },
1735
+ {
1736
+ "epoch": 1.088181261481935,
1737
+ "grad_norm": 0.2253541499376297,
1738
+ "learning_rate": 9.960392926073467e-05,
1739
+ "loss": 0.7028,
1740
+ "step": 1110
1741
+ },
1742
+ {
1743
+ "epoch": 1.0930802204531538,
1744
+ "grad_norm": 0.2347995936870575,
1745
+ "learning_rate": 9.949666330595961e-05,
1746
+ "loss": 0.7055,
1747
+ "step": 1115
1748
+ },
1749
+ {
1750
+ "epoch": 1.0979791794243723,
1751
+ "grad_norm": 0.21330611407756805,
1752
+ "learning_rate": 9.938894417575287e-05,
1753
+ "loss": 0.7326,
1754
+ "step": 1120
1755
+ },
1756
+ {
1757
+ "epoch": 1.102878138395591,
1758
+ "grad_norm": 0.20777581632137299,
1759
+ "learning_rate": 9.928077297709514e-05,
1760
+ "loss": 0.7198,
1761
+ "step": 1125
1762
+ },
1763
+ {
1764
+ "epoch": 1.1077770973668095,
1765
+ "grad_norm": 0.22546184062957764,
1766
+ "learning_rate": 9.91721508216129e-05,
1767
+ "loss": 0.6848,
1768
+ "step": 1130
1769
+ },
1770
+ {
1771
+ "epoch": 1.1126760563380282,
1772
+ "grad_norm": 0.22367283701896667,
1773
+ "learning_rate": 9.90630788255668e-05,
1774
+ "loss": 0.7067,
1775
+ "step": 1135
1776
+ },
1777
+ {
1778
+ "epoch": 1.1175750153092467,
1779
+ "grad_norm": 0.2060408741235733,
1780
+ "learning_rate": 9.895355810984042e-05,
1781
+ "loss": 0.7032,
1782
+ "step": 1140
1783
+ },
1784
+ {
1785
+ "epoch": 1.1224739742804655,
1786
+ "grad_norm": 0.22378048300743103,
1787
+ "learning_rate": 9.884358979992852e-05,
1788
+ "loss": 0.7039,
1789
+ "step": 1145
1790
+ },
1791
+ {
1792
+ "epoch": 1.127372933251684,
1793
+ "grad_norm": 0.22920195758342743,
1794
+ "learning_rate": 9.873317502592563e-05,
1795
+ "loss": 0.6932,
1796
+ "step": 1150
1797
+ },
1798
+ {
1799
+ "epoch": 1.127372933251684,
1800
+ "eval_loss": 0.6847204566001892,
1801
+ "eval_runtime": 15.5644,
1802
+ "eval_samples_per_second": 125.864,
1803
+ "eval_steps_per_second": 15.741,
1804
+ "step": 1150
1805
  }
1806
  ],
1807
  "logging_steps": 5,
 
1830
  "attributes": {}
1831
  }
1832
  },
1833
+ "total_flos": 5.936913961881436e+17,
1834
  "train_batch_size": 2,
1835
  "trial_name": null,
1836
  "trial_params": null