alicegoesdown commited on
Commit
91efca9
·
verified ·
1 Parent(s): 0198b14

Training in progress, step 2250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6e8b8c1e9c0bce3f00131df71d272a082ce5dcf2d99759019d2b0659c78315e
3
  size 1130395064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30fe7e1571759f904f96e96e35fb771537a64a4595d3f408a3013c5e45ab7675
3
  size 1130395064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6188e4d76c0b4e37ea00b0a3b23f5b7084262ecb5f16dd3e7bcbf521356872ef
3
  size 2260919034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:825681975624fe9e5a6487a56696a7029ffc7f55b92c5278918c1e8004acefda
3
  size 2260919034
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:347168a1ca5a3d3959109211d8471dcf62bcaa6f64d8a927b670d1eeeb82ee9c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:077c0ad5fe3c016a80345ab1197859150afba028dca4b16043c90b58d29ddacd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb8d370aad2d5fbcf199f24dc52e08b4648a77d17fa77f60c7ea0dbb61e6a008
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91fe12062159a4c96d50e57746d4f5b13fabfbae70b7575964aa5efb771f8a0a
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.7938833236694336,
3
  "best_model_checkpoint": "./output/checkpoint-750",
4
- "epoch": 2.681992337164751,
5
  "eval_steps": 150,
6
- "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1589,6 +1589,119 @@
1589
  "eval_samples_per_second": 11.053,
1590
  "eval_steps_per_second": 11.053,
1591
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1592
  }
1593
  ],
1594
  "logging_steps": 10,
@@ -1608,7 +1721,7 @@
1608
  "attributes": {}
1609
  }
1610
  },
1611
- "total_flos": 2.2711771292829696e+17,
1612
  "train_batch_size": 16,
1613
  "trial_name": null,
1614
  "trial_params": null
 
1
  {
2
  "best_metric": 1.7938833236694336,
3
  "best_model_checkpoint": "./output/checkpoint-750",
4
+ "epoch": 2.873563218390805,
5
  "eval_steps": 150,
6
+ "global_step": 2250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1589
  "eval_samples_per_second": 11.053,
1590
  "eval_steps_per_second": 11.053,
1591
  "step": 2100
1592
+ },
1593
+ {
1594
+ "epoch": 2.694763729246488,
1595
+ "grad_norm": 4.319637775421143,
1596
+ "learning_rate": 7.521673237007275e-06,
1597
+ "loss": 1.5039,
1598
+ "step": 2110
1599
+ },
1600
+ {
1601
+ "epoch": 2.707535121328225,
1602
+ "grad_norm": 4.250217914581299,
1603
+ "learning_rate": 7.518344931171188e-06,
1604
+ "loss": 1.4429,
1605
+ "step": 2120
1606
+ },
1607
+ {
1608
+ "epoch": 2.7203065134099615,
1609
+ "grad_norm": 4.189040184020996,
1610
+ "learning_rate": 7.5148628716184005e-06,
1611
+ "loss": 1.4521,
1612
+ "step": 2130
1613
+ },
1614
+ {
1615
+ "epoch": 2.7330779054916987,
1616
+ "grad_norm": 4.170867919921875,
1617
+ "learning_rate": 7.5112272014827445e-06,
1618
+ "loss": 1.4094,
1619
+ "step": 2140
1620
+ },
1621
+ {
1622
+ "epoch": 2.7458492975734354,
1623
+ "grad_norm": 4.322047233581543,
1624
+ "learning_rate": 7.507438070212378e-06,
1625
+ "loss": 1.4112,
1626
+ "step": 2150
1627
+ },
1628
+ {
1629
+ "epoch": 2.7586206896551726,
1630
+ "grad_norm": 3.8870387077331543,
1631
+ "learning_rate": 7.5034956335636445e-06,
1632
+ "loss": 1.4716,
1633
+ "step": 2160
1634
+ },
1635
+ {
1636
+ "epoch": 2.7713920817369093,
1637
+ "grad_norm": 4.1431474685668945,
1638
+ "learning_rate": 7.499400053594676e-06,
1639
+ "loss": 1.4712,
1640
+ "step": 2170
1641
+ },
1642
+ {
1643
+ "epoch": 2.784163473818646,
1644
+ "grad_norm": 4.3033599853515625,
1645
+ "learning_rate": 7.495151498658724e-06,
1646
+ "loss": 1.4473,
1647
+ "step": 2180
1648
+ },
1649
+ {
1650
+ "epoch": 2.796934865900383,
1651
+ "grad_norm": 4.849308013916016,
1652
+ "learning_rate": 7.490750143397239e-06,
1653
+ "loss": 1.5007,
1654
+ "step": 2190
1655
+ },
1656
+ {
1657
+ "epoch": 2.80970625798212,
1658
+ "grad_norm": 4.182689189910889,
1659
+ "learning_rate": 7.486196168732699e-06,
1660
+ "loss": 1.4584,
1661
+ "step": 2200
1662
+ },
1663
+ {
1664
+ "epoch": 2.822477650063857,
1665
+ "grad_norm": 4.407222270965576,
1666
+ "learning_rate": 7.481489761861165e-06,
1667
+ "loss": 1.4464,
1668
+ "step": 2210
1669
+ },
1670
+ {
1671
+ "epoch": 2.835249042145594,
1672
+ "grad_norm": 4.179383754730225,
1673
+ "learning_rate": 7.476631116244589e-06,
1674
+ "loss": 1.4528,
1675
+ "step": 2220
1676
+ },
1677
+ {
1678
+ "epoch": 2.8480204342273305,
1679
+ "grad_norm": 4.344815731048584,
1680
+ "learning_rate": 7.471620431602864e-06,
1681
+ "loss": 1.441,
1682
+ "step": 2230
1683
+ },
1684
+ {
1685
+ "epoch": 2.8607918263090677,
1686
+ "grad_norm": 4.661447525024414,
1687
+ "learning_rate": 7.466457913905607e-06,
1688
+ "loss": 1.4779,
1689
+ "step": 2240
1690
+ },
1691
+ {
1692
+ "epoch": 2.873563218390805,
1693
+ "grad_norm": 4.431833744049072,
1694
+ "learning_rate": 7.461143775363704e-06,
1695
+ "loss": 1.4027,
1696
+ "step": 2250
1697
+ },
1698
+ {
1699
+ "epoch": 2.873563218390805,
1700
+ "eval_loss": 1.819319248199463,
1701
+ "eval_runtime": 46.1899,
1702
+ "eval_samples_per_second": 10.825,
1703
+ "eval_steps_per_second": 10.825,
1704
+ "step": 2250
1705
  }
1706
  ],
1707
  "logging_steps": 10,
 
1721
  "attributes": {}
1722
  }
1723
  },
1724
+ "total_flos": 2.435145735461806e+17,
1725
  "train_batch_size": 16,
1726
  "trial_name": null,
1727
  "trial_params": null