jflotz commited on
Commit
1d53b9f
·
1 Parent(s): 9f854bb

Training in progress, step 650000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:570fd9971dd127676195908f1f0168c560d379e06053db1ec1c6889a24e76909
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0094043a45ee455c34cdbf7e5ed868b844e2cc109c62c31adc8eabe0945cd55
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:407afab53633fc482bbe780f5224c6b1388fc7b7dd3f17aa73388222d02bc81c
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afca04d573f1bb3162caabe1bb7b946edefb5cb8fa7beeabdf4a9618ee0ba3ea
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:069c7b8d28935c1bdaf707018f31232b5c5d0b17ca264ac835e0cab62f47f60b
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8afdb75bc9c4b3b8d3f36f77e21f0d34f0633a3fe673f092dd264b1121465456
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acfb6c5ca1e2a8aae6849b592c5e4c4b839246ca00f42f46fa8da24fee6f7051
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:334f507bebbd8e2eb32a3a52e1460054ef235aff9b388a6044a2cf6124700604
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed15d29f5bdaa33109b0c66a9aa2dbc57339a469e3f71f40bec5ec342e0d6d49
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1fe6196ed311cd8ddb4f7739bbce785a7482bd7a8a89fc83aadbb7b199e0b80
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:962d2f5974f30660f555e22cdf0c12b334de1b8fc49a6a5192e63c3a6ee6eebe
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c16cf0e46ab235f9e251974c64ca93772ae50300b4f1505ff50d8f4e2246708
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa2e736f0ce5f395a825ebebcf342c762745706534807a9b43b2a6a713704726
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dea98aa1b40d4dde89de24ce301ffc44f0dee70fb25e51e28dfe6b65e5e6240d
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bff10bd0517565104b7a365f7830fc50ca6a2c535ddf94460fc2737ad38c9a7
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:476caa126ce91db0ad93d8541266aa7e5c1a71c0473ab678864fc300fdd08e70
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24fffbd4923bc1f675f8117f531217edf35f82264eb436b97401dab9e4eeeaa0
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3637d3be8f9c8d6ff9f1958a71fe2d848eaecb87ddf0683d13eaae5352425491
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d716524e67d0b69cb1b5ffc99aa56ed5a73b186c4b6bfbd6ff0ef38267147113
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b62727907f78fc16c3f0e4b91fbdcc94bc537750512333e674d8d2c4dcd12411
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03a35091ba68234fa026466686321e8ce53cfe05ba57973184932ffc7464e369
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb24dd41ced00dc957e38e97b930833d2e52e5141588b4ec2f84d6e2ee23293d
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2793910098792973,
5
- "global_step": 640000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7686,11 +7686,131 @@
7686
  "learning_rate": 5.401619257572453e-05,
7687
  "loss": 0.3007,
7688
  "step": 640000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7689
  }
7690
  ],
7691
  "max_steps": 1000000,
7692
  "num_train_epochs": 2,
7693
- "total_flos": 4.326855661422541e+22,
7694
  "trial_name": null,
7695
  "trial_params": null
7696
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2993814944086615,
5
+ "global_step": 650000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7686
  "learning_rate": 5.401619257572453e-05,
7687
  "loss": 0.3007,
7688
  "step": 640000
7689
+ },
7690
+ {
7691
+ "epoch": 1.28,
7692
+ "learning_rate": 5.390875461869379e-05,
7693
+ "loss": 0.3011,
7694
+ "step": 640500
7695
+ },
7696
+ {
7697
+ "epoch": 1.28,
7698
+ "learning_rate": 5.3801387994131576e-05,
7699
+ "loss": 0.3012,
7700
+ "step": 641000
7701
+ },
7702
+ {
7703
+ "epoch": 1.28,
7704
+ "learning_rate": 5.36940929955742e-05,
7705
+ "loss": 0.3011,
7706
+ "step": 641500
7707
+ },
7708
+ {
7709
+ "epoch": 1.28,
7710
+ "learning_rate": 5.358686991636209e-05,
7711
+ "loss": 0.3005,
7712
+ "step": 642000
7713
+ },
7714
+ {
7715
+ "epoch": 1.28,
7716
+ "learning_rate": 5.347971904963904e-05,
7717
+ "loss": 0.3004,
7718
+ "step": 642500
7719
+ },
7720
+ {
7721
+ "epoch": 1.29,
7722
+ "learning_rate": 5.3372640688351476e-05,
7723
+ "loss": 0.3002,
7724
+ "step": 643000
7725
+ },
7726
+ {
7727
+ "epoch": 1.29,
7728
+ "learning_rate": 5.326563512524748e-05,
7729
+ "loss": 0.3005,
7730
+ "step": 643500
7731
+ },
7732
+ {
7733
+ "epoch": 1.29,
7734
+ "learning_rate": 5.315870265287618e-05,
7735
+ "loss": 0.2999,
7736
+ "step": 644000
7737
+ },
7738
+ {
7739
+ "epoch": 1.29,
7740
+ "learning_rate": 5.3051843563586914e-05,
7741
+ "loss": 0.3004,
7742
+ "step": 644500
7743
+ },
7744
+ {
7745
+ "epoch": 1.29,
7746
+ "learning_rate": 5.294505814952835e-05,
7747
+ "loss": 0.3003,
7748
+ "step": 645000
7749
+ },
7750
+ {
7751
+ "epoch": 1.29,
7752
+ "learning_rate": 5.28383467026477e-05,
7753
+ "loss": 0.2999,
7754
+ "step": 645500
7755
+ },
7756
+ {
7757
+ "epoch": 1.29,
7758
+ "learning_rate": 5.2731709514689995e-05,
7759
+ "loss": 0.2997,
7760
+ "step": 646000
7761
+ },
7762
+ {
7763
+ "epoch": 1.29,
7764
+ "learning_rate": 5.262514687719722e-05,
7765
+ "loss": 0.2999,
7766
+ "step": 646500
7767
+ },
7768
+ {
7769
+ "epoch": 1.29,
7770
+ "learning_rate": 5.25186590815076e-05,
7771
+ "loss": 0.3007,
7772
+ "step": 647000
7773
+ },
7774
+ {
7775
+ "epoch": 1.29,
7776
+ "learning_rate": 5.24122464187547e-05,
7777
+ "loss": 0.3007,
7778
+ "step": 647500
7779
+ },
7780
+ {
7781
+ "epoch": 1.3,
7782
+ "learning_rate": 5.2305909179866635e-05,
7783
+ "loss": 0.3002,
7784
+ "step": 648000
7785
+ },
7786
+ {
7787
+ "epoch": 1.3,
7788
+ "learning_rate": 5.219964765556536e-05,
7789
+ "loss": 0.3003,
7790
+ "step": 648500
7791
+ },
7792
+ {
7793
+ "epoch": 1.3,
7794
+ "learning_rate": 5.209346213636584e-05,
7795
+ "loss": 0.2997,
7796
+ "step": 649000
7797
+ },
7798
+ {
7799
+ "epoch": 1.3,
7800
+ "learning_rate": 5.1987352912575244e-05,
7801
+ "loss": 0.2995,
7802
+ "step": 649500
7803
+ },
7804
+ {
7805
+ "epoch": 1.3,
7806
+ "learning_rate": 5.188132027429215e-05,
7807
+ "loss": 0.2991,
7808
+ "step": 650000
7809
  }
7810
  ],
7811
  "max_steps": 1000000,
7812
  "num_train_epochs": 2,
7813
+ "total_flos": 4.394457756168279e+22,
7814
  "trial_name": null,
7815
  "trial_params": null
7816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:407afab53633fc482bbe780f5224c6b1388fc7b7dd3f17aa73388222d02bc81c
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afca04d573f1bb3162caabe1bb7b946edefb5cb8fa7beeabdf4a9618ee0ba3ea
3
  size 449450757