besimray commited on
Commit
1ded53e
·
verified ·
1 Parent(s): 759bbf9

Training in progress, step 210, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fccb9d19391a43ae36da2a29dca1396279a90010220baf3fdc58dd3a829f12c
3
  size 67662840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d259edcf51a6e65a5e5aa8f076d5bc4bf480fc4b4c59350991263774074d7ea
3
  size 67662840
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27344c3587987f033a2e10256a7429eafce2d23bd1d8359720449b41078b595b
3
  size 34607610
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0503c8ba76bfe0abdcb3e8a6104759013bd2d60c838b4625f0a1ddcf7615226
3
  size 34607610
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4869346972db0176eb12ec757dcc0880dcf0f71294066b4e62ba14aee601ed
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bfc5fa47af4dae874a1be827d0f45774971f451a821e11602842d4ee93aaa71
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:194047adc90239b1536a6d8862bfb9a2319ff9742b72adc68dd878597757d341
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29055dd59dc6fec528a1dd0a8f1388fe1bcd85af7ce5330f9713cff07d4913e7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.456692913385827,
5
  "eval_steps": 8,
6
- "global_step": 205,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1650,6 +1650,49 @@
1650
  "learning_rate": 4.5379898746037804e-05,
1651
  "loss": 0.8624,
1652
  "step": 205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1653
  }
1654
  ],
1655
  "logging_steps": 1,
@@ -1669,7 +1712,7 @@
1669
  "attributes": {}
1670
  }
1671
  },
1672
- "total_flos": 3.3305746833408e+16,
1673
  "train_batch_size": 15,
1674
  "trial_name": null,
1675
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.6141732283464565,
5
  "eval_steps": 8,
6
+ "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1650
  "learning_rate": 4.5379898746037804e-05,
1651
  "loss": 0.8624,
1652
  "step": 205
1653
+ },
1654
+ {
1655
+ "epoch": 6.488188976377953,
1656
+ "grad_norm": 1.210954189300537,
1657
+ "learning_rate": 4.529436096395156e-05,
1658
+ "loss": 0.9547,
1659
+ "step": 206
1660
+ },
1661
+ {
1662
+ "epoch": 6.519685039370079,
1663
+ "grad_norm": 1.2978620529174805,
1664
+ "learning_rate": 4.520812077077604e-05,
1665
+ "loss": 0.8868,
1666
+ "step": 207
1667
+ },
1668
+ {
1669
+ "epoch": 6.551181102362205,
1670
+ "grad_norm": 1.081527590751648,
1671
+ "learning_rate": 4.5121181151383143e-05,
1672
+ "loss": 0.7841,
1673
+ "step": 208
1674
+ },
1675
+ {
1676
+ "epoch": 6.551181102362205,
1677
+ "eval_loss": 1.2477138042449951,
1678
+ "eval_runtime": 3.8991,
1679
+ "eval_samples_per_second": 25.647,
1680
+ "eval_steps_per_second": 1.795,
1681
+ "step": 208
1682
+ },
1683
+ {
1684
+ "epoch": 6.582677165354331,
1685
+ "grad_norm": 1.1496070623397827,
1686
+ "learning_rate": 4.503354511485273e-05,
1687
+ "loss": 0.9487,
1688
+ "step": 209
1689
+ },
1690
+ {
1691
+ "epoch": 6.6141732283464565,
1692
+ "grad_norm": 1.188530683517456,
1693
+ "learning_rate": 4.494521569436845e-05,
1694
+ "loss": 0.9439,
1695
+ "step": 210
1696
  }
1697
  ],
1698
  "logging_steps": 1,
 
1712
  "attributes": {}
1713
  }
1714
  },
1715
+ "total_flos": 3.4155451121664e+16,
1716
  "train_batch_size": 15,
1717
  "trial_name": null,
1718
  "trial_params": null