jflotz commited on
Commit
7011cca
·
1 Parent(s): 0c72fd3

Training in progress, step 70000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d995b453b0a04bbd02c89c1e2a0b01d5ea8104964b6e575d18b8b2b852f0697a
3
- size 893440453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a48a0d3e06f6d9c4dbeec9dd5221abf842041a606411a3eb4c011b2b6ce157
3
+ size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72110dc5fec392448defa8c45f957d936420ff0753177d0b17b355c59e5f2deb
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91ea0abf3868a060d7ed4c0f943d81a8c31c59f1a4fbcac20e4c228f3d5306b
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0f5f3d7fc7de25a1729f017d3d6dc7b79407bb23fd390423966864300b65a09
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc4e58968df8d8de7b76afb49e34b862e7070d202e80b8a826f72b6f635c2cde
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a3a210506f64342e553d4bf48e6fadb6effff0f5dbb7697fdabf52d12772ab8
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:594120123fbd1fad56e4287c2d069c836d6ed898e13f3bf2eb5ae1995a6abf18
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0e2297daa1dc20f8b8863f55ad4770a767d9eba0c8a71f314c37ad1c3871cd7
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c2f001850d67d834ad6f5341284dffa53108ee6ecb8b9fb6837cfd65ff83d24
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c8ca75222d5b686b4dcb8add7e5e6c8e4bb0aa54a48b6c9650ff1b0ab1d9de6
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e245827ca7b63a97e06cc9b0093c1d74b595472c8bc308d4290564e966646b17
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee0fae1242e86663273cf365a3c63b1a3921cad2d2396be93b9c702039cd5d4a
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed21c5a14597e7b8ab428400d75245e02257e563d47635efe05355069d0b1928
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78f998ac055776992254d4db1276dbf13aa8f00ccf4471740b472226d6ea583d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01cbc2bdac8629204109c016e1ffa5b5eef157a33d3511366f9e2f7031a7ee33
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:069cccc1e69b521d4ca315007273f764ce63546923ca059673907772b710280e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2cc8ec76a8c7fadc25b1ad07a0e21e6f5b2945d2efe03f93181052bc36951e
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d375627d89cc809d43a8a304898ce0f67be3ee70ad132436bc25574b87fe6663
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b7f27abef02152cc0e2907407c48c40e2c6425985f5175919eb4dcfb4c58016
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8747acdc6c067a30e87070a5c0421cb4d690fbe00d316895f653b3a392344a3f
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fe2d09bf8807c63a805e572e94358dda9d6462e44109f7e340ab10774501127
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.11994290717618414,
5
- "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -726,11 +726,131 @@
726
  "learning_rate": 0.00014996172801386482,
727
  "loss": 0.4117,
728
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
  }
730
  ],
731
  "max_steps": 1000000,
732
  "num_train_epochs": 2,
733
- "total_flos": 4.0564501264363663e+21,
734
  "trial_name": null,
735
  "trial_params": null
736
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.13993339170554817,
5
+ "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
726
  "learning_rate": 0.00014996172801386482,
727
  "loss": 0.4117,
728
  "step": 60000
729
+ },
730
+ {
731
+ "epoch": 0.12,
732
+ "learning_rate": 0.00014995780552943551,
733
+ "loss": 0.4106,
734
+ "step": 60500
735
+ },
736
+ {
737
+ "epoch": 0.12,
738
+ "learning_rate": 0.00014995369178303722,
739
+ "loss": 0.4098,
740
+ "step": 61000
741
+ },
742
+ {
743
+ "epoch": 0.12,
744
+ "learning_rate": 0.0001499493867859168,
745
+ "loss": 0.4095,
746
+ "step": 61500
747
+ },
748
+ {
749
+ "epoch": 0.12,
750
+ "learning_rate": 0.0001499448905498439,
751
+ "loss": 0.4081,
752
+ "step": 62000
753
+ },
754
+ {
755
+ "epoch": 0.12,
756
+ "learning_rate": 0.00014994020308711106,
757
+ "loss": 0.408,
758
+ "step": 62500
759
+ },
760
+ {
761
+ "epoch": 0.13,
762
+ "learning_rate": 0.00014993532441053364,
763
+ "loss": 0.4074,
764
+ "step": 63000
765
+ },
766
+ {
767
+ "epoch": 0.13,
768
+ "learning_rate": 0.0001499302545334498,
769
+ "loss": 0.4066,
770
+ "step": 63500
771
+ },
772
+ {
773
+ "epoch": 0.13,
774
+ "learning_rate": 0.0001499249934697203,
775
+ "loss": 0.4055,
776
+ "step": 64000
777
+ },
778
+ {
779
+ "epoch": 0.13,
780
+ "learning_rate": 0.00014991954123372875,
781
+ "loss": 0.4049,
782
+ "step": 64500
783
+ },
784
+ {
785
+ "epoch": 0.13,
786
+ "learning_rate": 0.0001499138978403813,
787
+ "loss": 0.4038,
788
+ "step": 65000
789
+ },
790
+ {
791
+ "epoch": 0.13,
792
+ "learning_rate": 0.00014990806330510687,
793
+ "loss": 0.4039,
794
+ "step": 65500
795
+ },
796
+ {
797
+ "epoch": 0.13,
798
+ "learning_rate": 0.00014990203764385677,
799
+ "loss": 0.4029,
800
+ "step": 66000
801
+ },
802
+ {
803
+ "epoch": 0.13,
804
+ "learning_rate": 0.00014989582087310494,
805
+ "loss": 0.4028,
806
+ "step": 66500
807
+ },
808
+ {
809
+ "epoch": 0.13,
810
+ "learning_rate": 0.00014988941300984784,
811
+ "loss": 0.4022,
812
+ "step": 67000
813
+ },
814
+ {
815
+ "epoch": 0.13,
816
+ "learning_rate": 0.00014988281407160426,
817
+ "loss": 0.4013,
818
+ "step": 67500
819
+ },
820
+ {
821
+ "epoch": 0.14,
822
+ "learning_rate": 0.0001498760240764155,
823
+ "loss": 0.4003,
824
+ "step": 68000
825
+ },
826
+ {
827
+ "epoch": 0.14,
828
+ "learning_rate": 0.00014986904304284512,
829
+ "loss": 0.3996,
830
+ "step": 68500
831
+ },
832
+ {
833
+ "epoch": 0.14,
834
+ "learning_rate": 0.000149861870989979,
835
+ "loss": 0.3994,
836
+ "step": 69000
837
+ },
838
+ {
839
+ "epoch": 0.14,
840
+ "learning_rate": 0.00014985450793742527,
841
+ "loss": 0.399,
842
+ "step": 69500
843
+ },
844
+ {
845
+ "epoch": 0.14,
846
+ "learning_rate": 0.0001498469539053142,
847
+ "loss": 0.3993,
848
+ "step": 70000
849
  }
850
  ],
851
  "max_steps": 1000000,
852
  "num_train_epochs": 2,
853
+ "total_flos": 4.73259935611438e+21,
854
  "trial_name": null,
855
  "trial_params": null
856
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72110dc5fec392448defa8c45f957d936420ff0753177d0b17b355c59e5f2deb
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91ea0abf3868a060d7ed4c0f943d81a8c31c59f1a4fbcac20e4c228f3d5306b
3
  size 449450757