FormlessAI commited on
Commit
639429b
·
verified ·
1 Parent(s): f8f563c

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd912481a3f112c909bdf7772c4de3e0416c463e31dab3b572e8cd054f229188
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44441ec494dd1eeddb4f4b1f003d97643c00cca698aa10a2254f4b4bdacb8704
3
  size 98088784
last-checkpoint/global_step1100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dee808df81fc01618f166dd9adc8f410006ed9e8e5bdfbe48da6338752ec172
3
+ size 73939813
last-checkpoint/global_step1100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff19cac0e35b930e98f201f70574798e72b0216e01348755c4e6a66033319aa5
3
+ size 73939813
last-checkpoint/global_step1100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:392b29b2fbff6661a95bc8e5314c5f3f6d23ecd4140810512c3f17d4227567a0
3
+ size 73939877
last-checkpoint/global_step1100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:126b4cb8d8675f0b95af0b9b07199ccc0b35045b0f45a165f3677b5406ba7b15
3
+ size 73939877
last-checkpoint/global_step1100/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:472d3069e30cd1792f3b039921a033eb3a5631d786c055af3ce407fa6487cfb6
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1050
 
1
+ global_step1100
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd147d7cadee51c1cd4b7a96239e8821ac45609799ce3f758d85d65a610652a3
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ceb0ba0fefc4682de8ae9d502be348b266aef51d2c517ea10d576e3957cf16e
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dcbc055b03388dea41e9d71af92bbf514f0751a8067ba6941669b1f09b60b00
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7f0589f852327dcbb3a04372c5c9b3b3aed87183a18e4e78c8842af6ccc94ea
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:193c368d03fd736fbd74394414f3c4a9e31293e937e2453367fe03c25a1ccf85
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f4aa42ba29fbaf89d327737bbdbe96fa7085e909f789a4b592724ea39fd0491
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d4cc4dd1df4d4e124e948ad70a963d30df355b64c752b74477c3468b82fe011
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ed1a940d9e87126bc4746d90070268ad6d65dcc8b4794a5c83d93738db2dc6b
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a787b00c6cfa48c9ffd14b578310dac54fd359154d3100e20e3ed9a383ff3597
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41aec0a0f7fd8e266c974eb692fe1a8c668e3b6745d80b43c921e581b091927b
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6898888945579529,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.0293937538273117,
6
  "eval_steps": 50,
7
- "global_step": 1050,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1646,6 +1646,84 @@
1646
  "eval_samples_per_second": 126.592,
1647
  "eval_steps_per_second": 15.832,
1648
  "step": 1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1649
  }
1650
  ],
1651
  "logging_steps": 5,
@@ -1674,7 +1752,7 @@
1674
  "attributes": {}
1675
  }
1676
  },
1677
- "total_flos": 5.42831958984491e+17,
1678
  "train_batch_size": 2,
1679
  "trial_name": null,
1680
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6880703568458557,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0783833435394978,
6
  "eval_steps": 50,
7
+ "global_step": 1100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1646
  "eval_samples_per_second": 126.592,
1647
  "eval_steps_per_second": 15.832,
1648
  "step": 1050
1649
+ },
1650
+ {
1651
+ "epoch": 1.0342927127985304,
1652
+ "grad_norm": 0.22498337924480438,
1653
+ "learning_rate": 0.0001007536332725504,
1654
+ "loss": 0.7153,
1655
+ "step": 1055
1656
+ },
1657
+ {
1658
+ "epoch": 1.039191671769749,
1659
+ "grad_norm": 0.19475223124027252,
1660
+ "learning_rate": 0.00010065142396828989,
1661
+ "loss": 0.6969,
1662
+ "step": 1060
1663
+ },
1664
+ {
1665
+ "epoch": 1.0440906307409676,
1666
+ "grad_norm": 0.20079198479652405,
1667
+ "learning_rate": 0.00010054874962164521,
1668
+ "loss": 0.6906,
1669
+ "step": 1065
1670
+ },
1671
+ {
1672
+ "epoch": 1.0489895897121861,
1673
+ "grad_norm": 0.18500946462154388,
1674
+ "learning_rate": 0.00010044561128775412,
1675
+ "loss": 0.7027,
1676
+ "step": 1070
1677
+ },
1678
+ {
1679
+ "epoch": 1.0538885486834049,
1680
+ "grad_norm": 0.18668654561042786,
1681
+ "learning_rate": 0.0001003420100265226,
1682
+ "loss": 0.7157,
1683
+ "step": 1075
1684
+ },
1685
+ {
1686
+ "epoch": 1.0587875076546234,
1687
+ "grad_norm": 0.21674495935440063,
1688
+ "learning_rate": 0.00010023794690261389,
1689
+ "loss": 0.7208,
1690
+ "step": 1080
1691
+ },
1692
+ {
1693
+ "epoch": 1.063686466625842,
1694
+ "grad_norm": 0.20600494742393494,
1695
+ "learning_rate": 0.0001001334229854376,
1696
+ "loss": 0.6957,
1697
+ "step": 1085
1698
+ },
1699
+ {
1700
+ "epoch": 1.0685854255970606,
1701
+ "grad_norm": 0.2198040932416916,
1702
+ "learning_rate": 0.0001000284393491387,
1703
+ "loss": 0.7059,
1704
+ "step": 1090
1705
+ },
1706
+ {
1707
+ "epoch": 1.0734843845682793,
1708
+ "grad_norm": 0.225518599152565,
1709
+ "learning_rate": 9.99229970725865e-05,
1710
+ "loss": 0.7017,
1711
+ "step": 1095
1712
+ },
1713
+ {
1714
+ "epoch": 1.0783833435394978,
1715
+ "grad_norm": 0.2226964235305786,
1716
+ "learning_rate": 9.981709723936353e-05,
1717
+ "loss": 0.6967,
1718
+ "step": 1100
1719
+ },
1720
+ {
1721
+ "epoch": 1.0783833435394978,
1722
+ "eval_loss": 0.6880703568458557,
1723
+ "eval_runtime": 15.55,
1724
+ "eval_samples_per_second": 125.981,
1725
+ "eval_steps_per_second": 15.756,
1726
+ "step": 1100
1727
  }
1728
  ],
1729
  "logging_steps": 5,
 
1752
  "attributes": {}
1753
  }
1754
  },
1755
+ "total_flos": 5.6795728734846976e+17,
1756
  "train_batch_size": 2,
1757
  "trial_name": null,
1758
  "trial_params": null