NairaRahim commited on
Commit
da7fd85
·
verified ·
1 Parent(s): 1946fd9

Training in progress, epoch 17, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e1ea03da8d9a978320d45f1bc6677407a85624af3d9baa7bae32e5c03676367
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c4ff7c723a1e608d989b96e44d892070778ec83c952e55c57dddf3c3f48178
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b98a2483ec61025369cf6eb8fec5397cf636bfb0ffa7a3eedf987ef5b4b9d5c8
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d0a831ac586fb1be369d61d6f0cc41522b60258bcdd25b76e42085747f41185
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffc97010f20f826b75fdc09ec365ad76a45dfcdc64194b72334d5902c2cf28eb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46332136f176afc146e73eb2e78a93d7beb2c41f4d0c62f6c39855d4ba7b1979
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5384c34df266d1db083f57452aa67b48a3012f0aeee7f4ad7194984e89d75fe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:855574b974c7e1ac9f8ed715f000f5b33d3a42b4a2102f47eec78477a0831f87
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.54485321044922,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
4
- "epoch": 16.0,
5
  "eval_steps": 500,
6
- "global_step": 20880,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1591,6 +1591,105 @@
1591
  "eval_samples_per_second": 26.46,
1592
  "eval_steps_per_second": 3.325,
1593
  "step": 20880
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1594
  }
1595
  ],
1596
  "logging_steps": 100,
@@ -1605,7 +1704,7 @@
1605
  "early_stopping_threshold": 0.0
1606
  },
1607
  "attributes": {
1608
- "early_stopping_patience_counter": 0
1609
  }
1610
  },
1611
  "TrainerControl": {
@@ -1619,7 +1718,7 @@
1619
  "attributes": {}
1620
  }
1621
  },
1622
- "total_flos": 2.251697444875469e+16,
1623
  "train_batch_size": 8,
1624
  "trial_name": null,
1625
  "trial_params": null
 
1
  {
2
  "best_metric": 34.54485321044922,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
4
+ "epoch": 17.0,
5
  "eval_steps": 500,
6
+ "global_step": 22185,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1591
  "eval_samples_per_second": 26.46,
1592
  "eval_steps_per_second": 3.325,
1593
  "step": 20880
1594
+ },
1595
+ {
1596
+ "epoch": 16.015325670498083,
1597
+ "grad_norm": 2.970867156982422,
1598
+ "learning_rate": 3.999473180076628e-05,
1599
+ "loss": 33.5118,
1600
+ "step": 20900
1601
+ },
1602
+ {
1603
+ "epoch": 16.091954022988507,
1604
+ "grad_norm": 2.395005464553833,
1605
+ "learning_rate": 3.994683908045978e-05,
1606
+ "loss": 34.1932,
1607
+ "step": 21000
1608
+ },
1609
+ {
1610
+ "epoch": 16.168582375478927,
1611
+ "grad_norm": 2.8175065517425537,
1612
+ "learning_rate": 3.9898946360153264e-05,
1613
+ "loss": 32.9815,
1614
+ "step": 21100
1615
+ },
1616
+ {
1617
+ "epoch": 16.245210727969347,
1618
+ "grad_norm": 4.665389537811279,
1619
+ "learning_rate": 3.985105363984675e-05,
1620
+ "loss": 33.8616,
1621
+ "step": 21200
1622
+ },
1623
+ {
1624
+ "epoch": 16.32183908045977,
1625
+ "grad_norm": 3.425340175628662,
1626
+ "learning_rate": 3.980316091954023e-05,
1627
+ "loss": 33.2022,
1628
+ "step": 21300
1629
+ },
1630
+ {
1631
+ "epoch": 16.39846743295019,
1632
+ "grad_norm": 5.212127685546875,
1633
+ "learning_rate": 3.975574712643678e-05,
1634
+ "loss": 33.3935,
1635
+ "step": 21400
1636
+ },
1637
+ {
1638
+ "epoch": 16.47509578544061,
1639
+ "grad_norm": 1.9034606218338013,
1640
+ "learning_rate": 3.970785440613027e-05,
1641
+ "loss": 32.739,
1642
+ "step": 21500
1643
+ },
1644
+ {
1645
+ "epoch": 16.551724137931036,
1646
+ "grad_norm": 2.024109125137329,
1647
+ "learning_rate": 3.9659961685823756e-05,
1648
+ "loss": 33.4628,
1649
+ "step": 21600
1650
+ },
1651
+ {
1652
+ "epoch": 16.628352490421456,
1653
+ "grad_norm": 2.8185606002807617,
1654
+ "learning_rate": 3.961206896551724e-05,
1655
+ "loss": 33.7672,
1656
+ "step": 21700
1657
+ },
1658
+ {
1659
+ "epoch": 16.704980842911876,
1660
+ "grad_norm": 3.2981534004211426,
1661
+ "learning_rate": 3.956417624521073e-05,
1662
+ "loss": 33.1976,
1663
+ "step": 21800
1664
+ },
1665
+ {
1666
+ "epoch": 16.7816091954023,
1667
+ "grad_norm": 4.531330585479736,
1668
+ "learning_rate": 3.951628352490422e-05,
1669
+ "loss": 33.2379,
1670
+ "step": 21900
1671
+ },
1672
+ {
1673
+ "epoch": 16.85823754789272,
1674
+ "grad_norm": 2.4455623626708984,
1675
+ "learning_rate": 3.9468390804597704e-05,
1676
+ "loss": 33.2898,
1677
+ "step": 22000
1678
+ },
1679
+ {
1680
+ "epoch": 16.93486590038314,
1681
+ "grad_norm": 4.1596245765686035,
1682
+ "learning_rate": 3.942049808429119e-05,
1683
+ "loss": 33.2167,
1684
+ "step": 22100
1685
+ },
1686
+ {
1687
+ "epoch": 17.0,
1688
+ "eval_loss": 34.65380096435547,
1689
+ "eval_runtime": 49.3114,
1690
+ "eval_samples_per_second": 26.464,
1691
+ "eval_steps_per_second": 3.326,
1692
+ "step": 22185
1693
  }
1694
  ],
1695
  "logging_steps": 100,
 
1704
  "early_stopping_threshold": 0.0
1705
  },
1706
  "attributes": {
1707
+ "early_stopping_patience_counter": 1
1708
  }
1709
  },
1710
  "TrainerControl": {
 
1718
  "attributes": {}
1719
  }
1720
  },
1721
+ "total_flos": 2.3924285351801856e+16,
1722
  "train_batch_size": 8,
1723
  "trial_name": null,
1724
  "trial_params": null