rootxhacker commited on
Commit
c6249f1
·
verified ·
1 Parent(s): 7fb947c

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f99a737d1188d26749e1f79393398cf632ff62d367413aa544169176d1155d64
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbda29e3fc4574d26f4c2d88945024a17cf0a9095cfd76acea4713a70561f4b9
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bfc7b1dae63d312ce05a66fb9cd5a96fb16e5d6a11cfa4ba0f11944d2a8a072
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f807c16f8bec038522bfe252b213fc1087e8a39fb1f495399cdf7ee1d92e00a6
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c4598e219bb7f2bb5d2c34aa1d6d86aefb320efdbbe18af0ce36833a2da16e9
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b4e6f78506ab09c8a3ff311a2dfeb7a6190585c49701c5bd2fcc763c2b50448
3
+ size 14180
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c80bec9d89af6c0ab5db061d313a8506f3b43411e72ccae614261263d0bf59b7
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5057516ff1b0b207608fc5bf21c504e3b16c8f39cb674a438cf642593270922
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10022e052ca5e66e7d62da25b0a5c62caae94a4e337df0aab0b25e792e2f0920
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf5349680f22d749c8735deb8f593d381f787d0e5d89e99661139aee18144bbf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 9500,
3
  "best_metric": 4.328299045562744,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-9500",
5
- "epoch": 0.7307130220752249,
6
  "eval_steps": 250,
7
- "global_step": 9500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1642,6 +1642,92 @@
1642
  "eval_samples_per_second": 53.014,
1643
  "eval_steps_per_second": 13.253,
1644
  "step": 9500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1645
  }
1646
  ],
1647
  "logging_steps": 50,
 
2
  "best_global_step": 9500,
3
  "best_metric": 4.328299045562744,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-9500",
5
+ "epoch": 0.7691716021844474,
6
  "eval_steps": 250,
7
+ "global_step": 10000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1642
  "eval_samples_per_second": 53.014,
1643
  "eval_steps_per_second": 13.253,
1644
  "step": 9500
1645
+ },
1646
+ {
1647
+ "epoch": 0.7345588800861472,
1648
+ "grad_norm": 3.9685990810394287,
1649
+ "learning_rate": 9.141355097239072e-05,
1650
+ "loss": 4.325,
1651
+ "step": 9550
1652
+ },
1653
+ {
1654
+ "epoch": 0.7384047380970694,
1655
+ "grad_norm": 5.303285121917725,
1656
+ "learning_rate": 9.11562074923716e-05,
1657
+ "loss": 4.4277,
1658
+ "step": 9600
1659
+ },
1660
+ {
1661
+ "epoch": 0.7422505961079917,
1662
+ "grad_norm": 2.70599627494812,
1663
+ "learning_rate": 9.089886401235249e-05,
1664
+ "loss": 4.4329,
1665
+ "step": 9650
1666
+ },
1667
+ {
1668
+ "epoch": 0.7460964541189139,
1669
+ "grad_norm": 4.711449146270752,
1670
+ "learning_rate": 9.064152053233336e-05,
1671
+ "loss": 4.251,
1672
+ "step": 9700
1673
+ },
1674
+ {
1675
+ "epoch": 0.7499423121298362,
1676
+ "grad_norm": 3.0169851779937744,
1677
+ "learning_rate": 9.038417705231424e-05,
1678
+ "loss": 4.3483,
1679
+ "step": 9750
1680
+ },
1681
+ {
1682
+ "epoch": 0.7499423121298362,
1683
+ "eval_loss": 4.341108322143555,
1684
+ "eval_runtime": 18.9063,
1685
+ "eval_samples_per_second": 52.893,
1686
+ "eval_steps_per_second": 13.223,
1687
+ "step": 9750
1688
+ },
1689
+ {
1690
+ "epoch": 0.7537881701407584,
1691
+ "grad_norm": 3.375880002975464,
1692
+ "learning_rate": 9.012683357229513e-05,
1693
+ "loss": 4.313,
1694
+ "step": 9800
1695
+ },
1696
+ {
1697
+ "epoch": 0.7576340281516807,
1698
+ "grad_norm": 1.707850456237793,
1699
+ "learning_rate": 8.986949009227601e-05,
1700
+ "loss": 4.3062,
1701
+ "step": 9850
1702
+ },
1703
+ {
1704
+ "epoch": 0.7614798861626029,
1705
+ "grad_norm": 3.6718738079071045,
1706
+ "learning_rate": 8.96121466122569e-05,
1707
+ "loss": 4.4415,
1708
+ "step": 9900
1709
+ },
1710
+ {
1711
+ "epoch": 0.7653257441735252,
1712
+ "grad_norm": 3.5382699966430664,
1713
+ "learning_rate": 8.935480313223778e-05,
1714
+ "loss": 4.3754,
1715
+ "step": 9950
1716
+ },
1717
+ {
1718
+ "epoch": 0.7691716021844474,
1719
+ "grad_norm": 4.678229808807373,
1720
+ "learning_rate": 8.909745965221865e-05,
1721
+ "loss": 4.4404,
1722
+ "step": 10000
1723
+ },
1724
+ {
1725
+ "epoch": 0.7691716021844474,
1726
+ "eval_loss": 4.3746819496154785,
1727
+ "eval_runtime": 18.7221,
1728
+ "eval_samples_per_second": 53.413,
1729
+ "eval_steps_per_second": 13.353,
1730
+ "step": 10000
1731
  }
1732
  ],
1733
  "logging_steps": 50,