ljcamargo commited on
Commit
81e9ca5
·
verified ·
1 Parent(s): 8d3b340

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be9b9b0d8b856b09170cefdadbd16a208f59b19d453d160ff7ee756de359ba83
3
  size 3809184360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7ab03b183d936d545f20f6f1f9be6a3aa197747199a4e815057776bb6c269e4
3
  size 3809184360
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe10a4a37a5566b28dd2f1e582ef1d8e2ca9b9b974439cc128734b1d2c7295fc
3
- size 2457459557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b257bea1faadea9daefdeba8b5752cbc2ae0a9dc4ba5bb9f915892b9844ff02a
3
+ size 2458291491
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14f56edc583c1a07de7d3d9b93b5a6d1d2a82aa89cc60af96632062d66edfffd
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:210c5979df71a02690e406313b0596a8bb2750ef986d8191a5bc9ac5b46f7a7c
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf779ed14a82f5b8a151c3c4f681c8e668acf0a37d984440af016adca1961c5d
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f673d0030ffcf7d3f3fcb41b5abd69836d05a3195750de18ed9b6eb87259b06a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.6,
6
  "eval_steps": 500,
7
- "global_step": 2250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1583,6 +1583,181 @@
1583
  "learning_rate": 2.0283018867924532e-05,
1584
  "loss": 0.4407,
1585
  "step": 2250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1586
  }
1587
  ],
1588
  "logging_steps": 10,
@@ -1602,7 +1777,7 @@
1602
  "attributes": {}
1603
  }
1604
  },
1605
- "total_flos": 4.05919393302528e+16,
1606
  "train_batch_size": 2,
1607
  "trial_name": null,
1608
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.6666666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 2500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1583
  "learning_rate": 2.0283018867924532e-05,
1584
  "loss": 0.4407,
1585
  "step": 2250
1586
+ },
1587
+ {
1588
+ "epoch": 0.6026666666666667,
1589
+ "grad_norm": 19.082717895507812,
1590
+ "learning_rate": 2.0148247978436658e-05,
1591
+ "loss": 0.6069,
1592
+ "step": 2260
1593
+ },
1594
+ {
1595
+ "epoch": 0.6053333333333333,
1596
+ "grad_norm": 9.956210136413574,
1597
+ "learning_rate": 2.0013477088948788e-05,
1598
+ "loss": 0.3804,
1599
+ "step": 2270
1600
+ },
1601
+ {
1602
+ "epoch": 0.608,
1603
+ "grad_norm": 22.91667938232422,
1604
+ "learning_rate": 1.9878706199460917e-05,
1605
+ "loss": 0.4452,
1606
+ "step": 2280
1607
+ },
1608
+ {
1609
+ "epoch": 0.6106666666666667,
1610
+ "grad_norm": 11.472311973571777,
1611
+ "learning_rate": 1.9743935309973047e-05,
1612
+ "loss": 0.5524,
1613
+ "step": 2290
1614
+ },
1615
+ {
1616
+ "epoch": 0.6133333333333333,
1617
+ "grad_norm": 7.967957019805908,
1618
+ "learning_rate": 1.9609164420485177e-05,
1619
+ "loss": 0.3626,
1620
+ "step": 2300
1621
+ },
1622
+ {
1623
+ "epoch": 0.616,
1624
+ "grad_norm": 8.504613876342773,
1625
+ "learning_rate": 1.9474393530997306e-05,
1626
+ "loss": 0.2569,
1627
+ "step": 2310
1628
+ },
1629
+ {
1630
+ "epoch": 0.6186666666666667,
1631
+ "grad_norm": 11.852409362792969,
1632
+ "learning_rate": 1.9339622641509436e-05,
1633
+ "loss": 0.4051,
1634
+ "step": 2320
1635
+ },
1636
+ {
1637
+ "epoch": 0.6213333333333333,
1638
+ "grad_norm": 12.512871742248535,
1639
+ "learning_rate": 1.9204851752021562e-05,
1640
+ "loss": 0.3631,
1641
+ "step": 2330
1642
+ },
1643
+ {
1644
+ "epoch": 0.624,
1645
+ "grad_norm": 16.21477508544922,
1646
+ "learning_rate": 1.9070080862533692e-05,
1647
+ "loss": 0.7231,
1648
+ "step": 2340
1649
+ },
1650
+ {
1651
+ "epoch": 0.6266666666666667,
1652
+ "grad_norm": 12.807674407958984,
1653
+ "learning_rate": 1.893530997304582e-05,
1654
+ "loss": 0.4406,
1655
+ "step": 2350
1656
+ },
1657
+ {
1658
+ "epoch": 0.6293333333333333,
1659
+ "grad_norm": 8.169772148132324,
1660
+ "learning_rate": 1.880053908355795e-05,
1661
+ "loss": 0.5181,
1662
+ "step": 2360
1663
+ },
1664
+ {
1665
+ "epoch": 0.632,
1666
+ "grad_norm": 11.73438549041748,
1667
+ "learning_rate": 1.8665768194070084e-05,
1668
+ "loss": 0.392,
1669
+ "step": 2370
1670
+ },
1671
+ {
1672
+ "epoch": 0.6346666666666667,
1673
+ "grad_norm": 19.76089096069336,
1674
+ "learning_rate": 1.8530997304582214e-05,
1675
+ "loss": 0.4631,
1676
+ "step": 2380
1677
+ },
1678
+ {
1679
+ "epoch": 0.6373333333333333,
1680
+ "grad_norm": 9.086039543151855,
1681
+ "learning_rate": 1.839622641509434e-05,
1682
+ "loss": 0.3171,
1683
+ "step": 2390
1684
+ },
1685
+ {
1686
+ "epoch": 0.64,
1687
+ "grad_norm": 16.056007385253906,
1688
+ "learning_rate": 1.826145552560647e-05,
1689
+ "loss": 0.4297,
1690
+ "step": 2400
1691
+ },
1692
+ {
1693
+ "epoch": 0.6426666666666667,
1694
+ "grad_norm": 4.929039001464844,
1695
+ "learning_rate": 1.81266846361186e-05,
1696
+ "loss": 0.3565,
1697
+ "step": 2410
1698
+ },
1699
+ {
1700
+ "epoch": 0.6453333333333333,
1701
+ "grad_norm": 14.620661735534668,
1702
+ "learning_rate": 1.799191374663073e-05,
1703
+ "loss": 0.4003,
1704
+ "step": 2420
1705
+ },
1706
+ {
1707
+ "epoch": 0.648,
1708
+ "grad_norm": 11.428451538085938,
1709
+ "learning_rate": 1.785714285714286e-05,
1710
+ "loss": 0.3116,
1711
+ "step": 2430
1712
+ },
1713
+ {
1714
+ "epoch": 0.6506666666666666,
1715
+ "grad_norm": 7.882524490356445,
1716
+ "learning_rate": 1.7722371967654988e-05,
1717
+ "loss": 0.3754,
1718
+ "step": 2440
1719
+ },
1720
+ {
1721
+ "epoch": 0.6533333333333333,
1722
+ "grad_norm": 8.926907539367676,
1723
+ "learning_rate": 1.7587601078167118e-05,
1724
+ "loss": 0.3893,
1725
+ "step": 2450
1726
+ },
1727
+ {
1728
+ "epoch": 0.656,
1729
+ "grad_norm": 8.45529842376709,
1730
+ "learning_rate": 1.7452830188679244e-05,
1731
+ "loss": 0.4121,
1732
+ "step": 2460
1733
+ },
1734
+ {
1735
+ "epoch": 0.6586666666666666,
1736
+ "grad_norm": 8.348902702331543,
1737
+ "learning_rate": 1.7318059299191374e-05,
1738
+ "loss": 0.243,
1739
+ "step": 2470
1740
+ },
1741
+ {
1742
+ "epoch": 0.6613333333333333,
1743
+ "grad_norm": 7.9367852210998535,
1744
+ "learning_rate": 1.7183288409703503e-05,
1745
+ "loss": 0.2918,
1746
+ "step": 2480
1747
+ },
1748
+ {
1749
+ "epoch": 0.664,
1750
+ "grad_norm": 7.673737525939941,
1751
+ "learning_rate": 1.7048517520215633e-05,
1752
+ "loss": 0.4514,
1753
+ "step": 2490
1754
+ },
1755
+ {
1756
+ "epoch": 0.6666666666666666,
1757
+ "grad_norm": 17.945858001708984,
1758
+ "learning_rate": 1.6913746630727763e-05,
1759
+ "loss": 0.2516,
1760
+ "step": 2500
1761
  }
1762
  ],
1763
  "logging_steps": 10,
 
1777
  "attributes": {}
1778
  }
1779
  },
1780
+ "total_flos": 4.51119326667264e+16,
1781
  "train_batch_size": 2,
1782
  "trial_name": null,
1783
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbbcd8d1d06fb20c3b9e3291ad1f542ea351b7af728b87b6a77dba47653c0669
3
  size 6289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a68c2e906998593cf5a919298e03a1004101f2e08452617953a4c1c4243b097
3
  size 6289