jflotz commited on
Commit
ae93723
·
1 Parent(s): d8879f0

Training in progress, step 790000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f7748e8c1a812c7750cfe501e1201374a5c3fe0aa18d46f95d5f2b333fc8c81
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee6d0b8731469184859b6e2af2323dc331e9f7e709ceb8418eca6fab2f75e9cb
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c535b39652a41a9bd80472f25014719ca46117003764349c033c0d865a69629
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3c99a6d8856f7a728dbbbf96bf0c858122cdeb2ae96a80fcc6876c29d8e2666
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4647c95708e00e5dd99d1b79f20ca15f89d2d2a7bd2cde7a42a8d199abac73fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4647c95708e00e5dd99d1b79f20ca15f89d2d2a7bd2cde7a42a8d199abac73fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4647c95708e00e5dd99d1b79f20ca15f89d2d2a7bd2cde7a42a8d199abac73fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4647c95708e00e5dd99d1b79f20ca15f89d2d2a7bd2cde7a42a8d199abac73fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4647c95708e00e5dd99d1b79f20ca15f89d2d2a7bd2cde7a42a8d199abac73fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4647c95708e00e5dd99d1b79f20ca15f89d2d2a7bd2cde7a42a8d199abac73fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4647c95708e00e5dd99d1b79f20ca15f89d2d2a7bd2cde7a42a8d199abac73fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4647c95708e00e5dd99d1b79f20ca15f89d2d2a7bd2cde7a42a8d199abac73fd
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c067c2b3d8b4465df473fc8f38cbbeff61d1e95141d65c9e7d5985b861950165
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83d1297302d20060e31d476195b98906c23904815e65152eb2d3ffb7dd074183
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.342103208681396,
5
- "global_step": 780000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -15606,11 +15606,211 @@
15606
  "eval_samples_per_second": 840.721,
15607
  "eval_steps_per_second": 13.176,
15608
  "step": 780000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15609
  }
15610
  ],
15611
  "max_steps": 1000000,
15612
  "num_train_epochs": 12,
15613
- "total_flos": 5.467776548786952e+22,
15614
  "trial_name": null,
15615
  "trial_params": null
15616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.453631932904319,
5
+ "global_step": 790000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
15606
  "eval_samples_per_second": 840.721,
15607
  "eval_steps_per_second": 13.176,
15608
  "step": 780000
15609
+ },
15610
+ {
15611
+ "epoch": 8.35,
15612
+ "learning_rate": 2.7645596244470935e-05,
15613
+ "loss": 0.1885,
15614
+ "step": 780500
15615
+ },
15616
+ {
15617
+ "epoch": 8.35,
15618
+ "learning_rate": 2.7568839012773365e-05,
15619
+ "loss": 0.1884,
15620
+ "step": 781000
15621
+ },
15622
+ {
15623
+ "epoch": 8.35,
15624
+ "eval_loss": 0.1805545538663864,
15625
+ "eval_runtime": 2.6452,
15626
+ "eval_samples_per_second": 868.354,
15627
+ "eval_steps_per_second": 13.609,
15628
+ "step": 781000
15629
+ },
15630
+ {
15631
+ "epoch": 8.36,
15632
+ "learning_rate": 2.7492225125867825e-05,
15633
+ "loss": 0.1889,
15634
+ "step": 781500
15635
+ },
15636
+ {
15637
+ "epoch": 8.36,
15638
+ "learning_rate": 2.7415754793213826e-05,
15639
+ "loss": 0.1886,
15640
+ "step": 782000
15641
+ },
15642
+ {
15643
+ "epoch": 8.36,
15644
+ "eval_loss": 0.1767302304506302,
15645
+ "eval_runtime": 2.6579,
15646
+ "eval_samples_per_second": 864.226,
15647
+ "eval_steps_per_second": 13.545,
15648
+ "step": 782000
15649
+ },
15650
+ {
15651
+ "epoch": 8.37,
15652
+ "learning_rate": 2.7339428223878283e-05,
15653
+ "loss": 0.1883,
15654
+ "step": 782500
15655
+ },
15656
+ {
15657
+ "epoch": 8.38,
15658
+ "learning_rate": 2.7263245626535116e-05,
15659
+ "loss": 0.1884,
15660
+ "step": 783000
15661
+ },
15662
+ {
15663
+ "epoch": 8.38,
15664
+ "eval_loss": 0.1802656203508377,
15665
+ "eval_runtime": 2.7131,
15666
+ "eval_samples_per_second": 846.621,
15667
+ "eval_steps_per_second": 13.269,
15668
+ "step": 783000
15669
+ },
15670
+ {
15671
+ "epoch": 8.38,
15672
+ "learning_rate": 2.7187207209464687e-05,
15673
+ "loss": 0.1883,
15674
+ "step": 783500
15675
+ },
15676
+ {
15677
+ "epoch": 8.39,
15678
+ "learning_rate": 2.7111313180553077e-05,
15679
+ "loss": 0.1882,
15680
+ "step": 784000
15681
+ },
15682
+ {
15683
+ "epoch": 8.39,
15684
+ "eval_loss": 0.17976997792720795,
15685
+ "eval_runtime": 2.668,
15686
+ "eval_samples_per_second": 860.932,
15687
+ "eval_steps_per_second": 13.493,
15688
+ "step": 784000
15689
+ },
15690
+ {
15691
+ "epoch": 8.39,
15692
+ "learning_rate": 2.703556374729169e-05,
15693
+ "loss": 0.1885,
15694
+ "step": 784500
15695
+ },
15696
+ {
15697
+ "epoch": 8.4,
15698
+ "learning_rate": 2.6959959116776587e-05,
15699
+ "loss": 0.188,
15700
+ "step": 785000
15701
+ },
15702
+ {
15703
+ "epoch": 8.4,
15704
+ "eval_loss": 0.1783231794834137,
15705
+ "eval_runtime": 2.6459,
15706
+ "eval_samples_per_second": 868.123,
15707
+ "eval_steps_per_second": 13.606,
15708
+ "step": 785000
15709
+ },
15710
+ {
15711
+ "epoch": 8.4,
15712
+ "learning_rate": 2.68844994957079e-05,
15713
+ "loss": 0.1881,
15714
+ "step": 785500
15715
+ },
15716
+ {
15717
+ "epoch": 8.41,
15718
+ "learning_rate": 2.6809185090389406e-05,
15719
+ "loss": 0.1884,
15720
+ "step": 786000
15721
+ },
15722
+ {
15723
+ "epoch": 8.41,
15724
+ "eval_loss": 0.18017184734344482,
15725
+ "eval_runtime": 2.6671,
15726
+ "eval_samples_per_second": 861.233,
15727
+ "eval_steps_per_second": 13.498,
15728
+ "step": 786000
15729
+ },
15730
+ {
15731
+ "epoch": 8.41,
15732
+ "learning_rate": 2.6734016106727777e-05,
15733
+ "loss": 0.1881,
15734
+ "step": 786500
15735
+ },
15736
+ {
15737
+ "epoch": 8.42,
15738
+ "learning_rate": 2.6658992750232167e-05,
15739
+ "loss": 0.188,
15740
+ "step": 787000
15741
+ },
15742
+ {
15743
+ "epoch": 8.42,
15744
+ "eval_loss": 0.17710144817829132,
15745
+ "eval_runtime": 2.6667,
15746
+ "eval_samples_per_second": 861.367,
15747
+ "eval_steps_per_second": 13.5,
15748
+ "step": 787000
15749
+ },
15750
+ {
15751
+ "epoch": 8.43,
15752
+ "learning_rate": 2.6584115226013553e-05,
15753
+ "loss": 0.1883,
15754
+ "step": 787500
15755
+ },
15756
+ {
15757
+ "epoch": 8.43,
15758
+ "learning_rate": 2.6509383738784218e-05,
15759
+ "loss": 0.188,
15760
+ "step": 788000
15761
+ },
15762
+ {
15763
+ "epoch": 8.43,
15764
+ "eval_loss": 0.1786525398492813,
15765
+ "eval_runtime": 2.6579,
15766
+ "eval_samples_per_second": 864.232,
15767
+ "eval_steps_per_second": 13.545,
15768
+ "step": 788000
15769
+ },
15770
+ {
15771
+ "epoch": 8.44,
15772
+ "learning_rate": 2.6434798492857228e-05,
15773
+ "loss": 0.1881,
15774
+ "step": 788500
15775
+ },
15776
+ {
15777
+ "epoch": 8.44,
15778
+ "learning_rate": 2.6360359692145757e-05,
15779
+ "loss": 0.1882,
15780
+ "step": 789000
15781
+ },
15782
+ {
15783
+ "epoch": 8.44,
15784
+ "eval_loss": 0.17897970974445343,
15785
+ "eval_runtime": 2.6253,
15786
+ "eval_samples_per_second": 874.933,
15787
+ "eval_steps_per_second": 13.712,
15788
+ "step": 789000
15789
+ },
15790
+ {
15791
+ "epoch": 8.45,
15792
+ "learning_rate": 2.6286067540162677e-05,
15793
+ "loss": 0.1882,
15794
+ "step": 789500
15795
+ },
15796
+ {
15797
+ "epoch": 8.45,
15798
+ "learning_rate": 2.6211922240019883e-05,
15799
+ "loss": 0.1883,
15800
+ "step": 790000
15801
+ },
15802
+ {
15803
+ "epoch": 8.45,
15804
+ "eval_loss": 0.17872017621994019,
15805
+ "eval_runtime": 2.5868,
15806
+ "eval_samples_per_second": 887.972,
15807
+ "eval_steps_per_second": 13.917,
15808
+ "step": 790000
15809
  }
15810
  ],
15811
  "max_steps": 1000000,
15812
  "num_train_epochs": 12,
15813
+ "total_flos": 5.5378768098172995e+22,
15814
  "trial_name": null,
15815
  "trial_params": null
15816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c535b39652a41a9bd80472f25014719ca46117003764349c033c0d865a69629
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3c99a6d8856f7a728dbbbf96bf0c858122cdeb2ae96a80fcc6876c29d8e2666
3
  size 449471589