ljcamargo commited on
Commit
c371e4b
·
verified ·
1 Parent(s): e397577

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3daa573e6c713aaba8eac212b616972e27300562149c5a86cf161a20a2986ab7
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:748811954799fb8207c027f9b41646692c79d79091f80ac4e270ed4cc9d5b86d
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecb57a7b1bc6df393a4d72695c3003da6933e5235a702cfb45b794fb4c7477bb
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b67e674b05b252f5f41be162214aad1c332bd589e7f1ad1cc4ce742dbb1b2122
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db5e98494f4088139d10e50b166d461cb7e004fb4e01c00728b39a7d5b780d91
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb7c2d8a4305600ccebb18dd04e9a0785ea448686f24e449566e93d850fb957
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2e9f05b210aeadfc93d7bb7ed80d64988a89f8306f57d2f4dda99778443c8e5
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6c7d122b19d10b5d68d81c5fb7ac1f07cbefaf32391619951f39bd376dd10e0
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.96,
6
  "eval_steps": 500,
7
- "global_step": 2400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1688,6 +1688,76 @@
1688
  "learning_rate": 2.1101286173633444e-06,
1689
  "loss": 0.2135,
1690
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1691
  }
1692
  ],
1693
  "logging_steps": 10,
@@ -1702,12 +1772,12 @@
1702
  "should_evaluate": false,
1703
  "should_log": false,
1704
  "should_save": true,
1705
- "should_training_stop": false
1706
  },
1707
  "attributes": {}
1708
  }
1709
  },
1710
- "total_flos": 4.334241751385702e+16,
1711
  "train_batch_size": 2,
1712
  "trial_name": null,
1713
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 2500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1688
  "learning_rate": 2.1101286173633444e-06,
1689
  "loss": 0.2135,
1690
  "step": 2400
1691
+ },
1692
+ {
1693
+ "epoch": 0.964,
1694
+ "grad_norm": 4.838590621948242,
1695
+ "learning_rate": 1.909163987138264e-06,
1696
+ "loss": 0.3181,
1697
+ "step": 2410
1698
+ },
1699
+ {
1700
+ "epoch": 0.968,
1701
+ "grad_norm": 6.775151252746582,
1702
+ "learning_rate": 1.7081993569131833e-06,
1703
+ "loss": 0.2516,
1704
+ "step": 2420
1705
+ },
1706
+ {
1707
+ "epoch": 0.972,
1708
+ "grad_norm": 13.491608619689941,
1709
+ "learning_rate": 1.507234726688103e-06,
1710
+ "loss": 0.2294,
1711
+ "step": 2430
1712
+ },
1713
+ {
1714
+ "epoch": 0.976,
1715
+ "grad_norm": 7.067889213562012,
1716
+ "learning_rate": 1.3062700964630226e-06,
1717
+ "loss": 0.2206,
1718
+ "step": 2440
1719
+ },
1720
+ {
1721
+ "epoch": 0.98,
1722
+ "grad_norm": 6.7473530769348145,
1723
+ "learning_rate": 1.1053054662379423e-06,
1724
+ "loss": 0.2003,
1725
+ "step": 2450
1726
+ },
1727
+ {
1728
+ "epoch": 0.984,
1729
+ "grad_norm": 9.074109077453613,
1730
+ "learning_rate": 9.043408360128617e-07,
1731
+ "loss": 0.2343,
1732
+ "step": 2460
1733
+ },
1734
+ {
1735
+ "epoch": 0.988,
1736
+ "grad_norm": 10.73699951171875,
1737
+ "learning_rate": 7.033762057877814e-07,
1738
+ "loss": 0.6721,
1739
+ "step": 2470
1740
+ },
1741
+ {
1742
+ "epoch": 0.992,
1743
+ "grad_norm": 5.29847526550293,
1744
+ "learning_rate": 5.02411575562701e-07,
1745
+ "loss": 0.236,
1746
+ "step": 2480
1747
+ },
1748
+ {
1749
+ "epoch": 0.996,
1750
+ "grad_norm": 10.172593116760254,
1751
+ "learning_rate": 3.014469453376206e-07,
1752
+ "loss": 0.216,
1753
+ "step": 2490
1754
+ },
1755
+ {
1756
+ "epoch": 1.0,
1757
+ "grad_norm": 16.507993698120117,
1758
+ "learning_rate": 1.0048231511254019e-07,
1759
+ "loss": 0.2836,
1760
+ "step": 2500
1761
  }
1762
  ],
1763
  "logging_steps": 10,
 
1772
  "should_evaluate": false,
1773
  "should_log": false,
1774
  "should_save": true,
1775
+ "should_training_stop": true
1776
  },
1777
  "attributes": {}
1778
  }
1779
  },
1780
+ "total_flos": 4.513321594098893e+16,
1781
  "train_batch_size": 2,
1782
  "trial_name": null,
1783
  "trial_params": null