FredericFan commited on
Commit
7021782
·
verified ·
1 Parent(s): 6f179e9

Training in progress, step 11000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aacaf54054c9481ab96f4c124aec209c4d1863401659f98c84a616c7cf3b550
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d578e67cefa0cbc87c4588c8c5826c14d9ecd5d9fbb6cb344342920af139033
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8f332eb9f4d28577df53d3028e51e20450cadb931e86cac2055b40c315a56d9
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:178bbcd230b3578cd203d8ae6c2add9e74bb7aeabb94d447a3f70919fc0e8241
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcc8dbed04c9f3746bd0fa583310ef7861dc750445ac3c5d408dc532e0ad467a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b125975f9fae8190f9270b46403bcee0bbce4a754ced1327bdd16ba81bfb9533
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2b3eacb375d6a673591c6a5df9c962d90dc8b2211d5552b0eeae2d8a3b7ae27
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:224b826add4bdc720485795d14278c83b0b542b6e36e3b8f6419da3fb4a5e74d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08319947868585587,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-10500",
4
- "epoch": 0.84,
5
  "eval_steps": 500,
6
- "global_step": 10500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1645,6 +1645,84 @@
1645
  "eval_samples_per_second": 22.724,
1646
  "eval_steps_per_second": 5.681,
1647
  "step": 10500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1648
  }
1649
  ],
1650
  "logging_steps": 50,
@@ -1664,7 +1742,7 @@
1664
  "attributes": {}
1665
  }
1666
  },
1667
- "total_flos": 2.557623140352e+16,
1668
  "train_batch_size": 4,
1669
  "trial_name": null,
1670
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08301527053117752,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-11000",
4
+ "epoch": 0.88,
5
  "eval_steps": 500,
6
+ "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1645
  "eval_samples_per_second": 22.724,
1646
  "eval_steps_per_second": 5.681,
1647
  "step": 10500
1648
+ },
1649
+ {
1650
+ "epoch": 0.844,
1651
+ "grad_norm": 0.16710689663887024,
1652
+ "learning_rate": 1.7342400000000002e-05,
1653
+ "loss": 0.0616,
1654
+ "step": 10550
1655
+ },
1656
+ {
1657
+ "epoch": 0.848,
1658
+ "grad_norm": 0.2513883411884308,
1659
+ "learning_rate": 1.7282400000000002e-05,
1660
+ "loss": 0.0638,
1661
+ "step": 10600
1662
+ },
1663
+ {
1664
+ "epoch": 0.852,
1665
+ "grad_norm": 0.182452991604805,
1666
+ "learning_rate": 1.7222400000000003e-05,
1667
+ "loss": 0.0588,
1668
+ "step": 10650
1669
+ },
1670
+ {
1671
+ "epoch": 0.856,
1672
+ "grad_norm": 0.16390974819660187,
1673
+ "learning_rate": 1.71624e-05,
1674
+ "loss": 0.059,
1675
+ "step": 10700
1676
+ },
1677
+ {
1678
+ "epoch": 0.86,
1679
+ "grad_norm": 0.13543175160884857,
1680
+ "learning_rate": 1.71024e-05,
1681
+ "loss": 0.0648,
1682
+ "step": 10750
1683
+ },
1684
+ {
1685
+ "epoch": 0.864,
1686
+ "grad_norm": 0.12370152771472931,
1687
+ "learning_rate": 1.70424e-05,
1688
+ "loss": 0.0625,
1689
+ "step": 10800
1690
+ },
1691
+ {
1692
+ "epoch": 0.868,
1693
+ "grad_norm": 0.23662041127681732,
1694
+ "learning_rate": 1.6982400000000002e-05,
1695
+ "loss": 0.0667,
1696
+ "step": 10850
1697
+ },
1698
+ {
1699
+ "epoch": 0.872,
1700
+ "grad_norm": 0.1502092331647873,
1701
+ "learning_rate": 1.6922400000000003e-05,
1702
+ "loss": 0.0652,
1703
+ "step": 10900
1704
+ },
1705
+ {
1706
+ "epoch": 0.876,
1707
+ "grad_norm": 0.1745540052652359,
1708
+ "learning_rate": 1.68624e-05,
1709
+ "loss": 0.063,
1710
+ "step": 10950
1711
+ },
1712
+ {
1713
+ "epoch": 0.88,
1714
+ "grad_norm": 0.20543019473552704,
1715
+ "learning_rate": 1.68024e-05,
1716
+ "loss": 0.0605,
1717
+ "step": 11000
1718
+ },
1719
+ {
1720
+ "epoch": 0.88,
1721
+ "eval_loss": 0.08301527053117752,
1722
+ "eval_runtime": 88.0655,
1723
+ "eval_samples_per_second": 22.71,
1724
+ "eval_steps_per_second": 5.678,
1725
+ "step": 11000
1726
  }
1727
  ],
1728
  "logging_steps": 50,
 
1742
  "attributes": {}
1743
  }
1744
  },
1745
+ "total_flos": 2.679414718464e+16,
1746
  "train_batch_size": 4,
1747
  "trial_name": null,
1748
  "trial_params": null