rootxhacker commited on
Commit
003903a
·
verified ·
1 Parent(s): d769cf0

Training in progress, step 9500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eecb66b5b687b2480b18fa49a3a7093840558d372a33d29f18cc9ddc9d5973f6
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99a737d1188d26749e1f79393398cf632ff62d367413aa544169176d1155d64
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:246b4fcf33df56c5b498c44dfc6d12184de263d7f241a380037b6387910c9a75
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bfc7b1dae63d312ce05a66fb9cd5a96fb16e5d6a11cfa4ba0f11944d2a8a072
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b68f148982346537acf196edf0aa44542990dee8efc3893aa00dae2ca2e993b5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c4598e219bb7f2bb5d2c34aa1d6d86aefb320efdbbe18af0ce36833a2da16e9
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83c5d85e32786b1e35a549f6e7bfc25b63f0617678a2a77d09f7e94475702a2a
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c80bec9d89af6c0ab5db061d313a8506f3b43411e72ccae614261263d0bf59b7
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9af065edeaca88f16d8fcf52e4c33f05f28955d2f4f45d569975711d2168cd7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10022e052ca5e66e7d62da25b0a5c62caae94a4e337df0aab0b25e792e2f0920
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 8500,
3
- "best_metric": 4.392988204956055,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-8500",
5
- "epoch": 0.6922544419660026,
6
  "eval_steps": 250,
7
- "global_step": 9000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1556,6 +1556,92 @@
1556
  "eval_samples_per_second": 53.269,
1557
  "eval_steps_per_second": 13.317,
1558
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1559
  }
1560
  ],
1561
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 9500,
3
+ "best_metric": 4.328299045562744,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-9500",
5
+ "epoch": 0.7307130220752249,
6
  "eval_steps": 250,
7
+ "global_step": 9500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1556
  "eval_samples_per_second": 53.269,
1557
  "eval_steps_per_second": 13.317,
1558
  "step": 9000
1559
+ },
1560
+ {
1561
+ "epoch": 0.6961002999769248,
1562
+ "grad_norm": 9.26623821258545,
1563
+ "learning_rate": 9.39869857725819e-05,
1564
+ "loss": 4.3073,
1565
+ "step": 9050
1566
+ },
1567
+ {
1568
+ "epoch": 0.6999461579878471,
1569
+ "grad_norm": 3.5981953144073486,
1570
+ "learning_rate": 9.372964229256276e-05,
1571
+ "loss": 4.3923,
1572
+ "step": 9100
1573
+ },
1574
+ {
1575
+ "epoch": 0.7037920159987693,
1576
+ "grad_norm": 3.734813690185547,
1577
+ "learning_rate": 9.347229881254364e-05,
1578
+ "loss": 4.2449,
1579
+ "step": 9150
1580
+ },
1581
+ {
1582
+ "epoch": 0.7076378740096916,
1583
+ "grad_norm": 5.646871566772461,
1584
+ "learning_rate": 9.321495533252453e-05,
1585
+ "loss": 4.3953,
1586
+ "step": 9200
1587
+ },
1588
+ {
1589
+ "epoch": 0.7114837320206138,
1590
+ "grad_norm": 4.284733295440674,
1591
+ "learning_rate": 9.295761185250541e-05,
1592
+ "loss": 4.475,
1593
+ "step": 9250
1594
+ },
1595
+ {
1596
+ "epoch": 0.7114837320206138,
1597
+ "eval_loss": 4.348310470581055,
1598
+ "eval_runtime": 19.0285,
1599
+ "eval_samples_per_second": 52.553,
1600
+ "eval_steps_per_second": 13.138,
1601
+ "step": 9250
1602
+ },
1603
+ {
1604
+ "epoch": 0.7153295900315361,
1605
+ "grad_norm": 5.92791223526001,
1606
+ "learning_rate": 9.27002683724863e-05,
1607
+ "loss": 4.5493,
1608
+ "step": 9300
1609
+ },
1610
+ {
1611
+ "epoch": 0.7191754480424583,
1612
+ "grad_norm": 4.768808841705322,
1613
+ "learning_rate": 9.244292489246719e-05,
1614
+ "loss": 4.2508,
1615
+ "step": 9350
1616
+ },
1617
+ {
1618
+ "epoch": 0.7230213060533806,
1619
+ "grad_norm": 3.473097562789917,
1620
+ "learning_rate": 9.218558141244805e-05,
1621
+ "loss": 4.4534,
1622
+ "step": 9400
1623
+ },
1624
+ {
1625
+ "epoch": 0.7268671640643027,
1626
+ "grad_norm": 10.189091682434082,
1627
+ "learning_rate": 9.192823793242895e-05,
1628
+ "loss": 4.3883,
1629
+ "step": 9450
1630
+ },
1631
+ {
1632
+ "epoch": 0.7307130220752249,
1633
+ "grad_norm": 1.9577853679656982,
1634
+ "learning_rate": 9.167089445240982e-05,
1635
+ "loss": 4.3191,
1636
+ "step": 9500
1637
+ },
1638
+ {
1639
+ "epoch": 0.7307130220752249,
1640
+ "eval_loss": 4.328299045562744,
1641
+ "eval_runtime": 18.8631,
1642
+ "eval_samples_per_second": 53.014,
1643
+ "eval_steps_per_second": 13.253,
1644
+ "step": 9500
1645
  }
1646
  ],
1647
  "logging_steps": 50,