irishprancer commited on
Commit
9d66bfd
·
verified ·
1 Parent(s): b32b8ee

Training in progress, step 900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0df78e692defadb84568edca305adffd7b9529abb8c9b3b91b1f9ea47d1a21f5
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c160d59362d9998e9102b16a7c722555cc9a3b87d2a24bd56cfa2e1406fd59
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de73ad5b03551d4d5684644b29f657d82e4cbc9d8443f9f124c546c260bed71f
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b27372b9255f1e87b65fe832eb4ea027e48ccbfe3973c3339a67d4e77ed6bb
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ca9142fcbd976a2b9880762578e5776f18d9cad34016a627060f41ab78ec47d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5535f299045fe8f5c903e1fe89ea07c865852a623fd1578e00cbe2ef4b1aa5e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a615c97559beeeb761a113ec8f76cd5a30f01f1e374145e869eb8521acc6eaa9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e38fc7cb67eb5df095ca277e8495019522abe02c611f770970f2de2af01dea
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7168284058570862,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 32.608695652173914,
5
  "eval_steps": 150,
6
- "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -732,6 +732,151 @@
732
  "eval_samples_per_second": 24.083,
733
  "eval_steps_per_second": 24.083,
734
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735
  }
736
  ],
737
  "logging_steps": 10,
@@ -751,7 +896,7 @@
751
  "attributes": {}
752
  }
753
  },
754
- "total_flos": 1.9231766387195904e+16,
755
  "train_batch_size": 4,
756
  "trial_name": null,
757
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7168284058570862,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 39.130434782608695,
5
  "eval_steps": 150,
6
+ "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
732
  "eval_samples_per_second": 24.083,
733
  "eval_steps_per_second": 24.083,
734
  "step": 750
735
+ },
736
+ {
737
+ "epoch": 33.04347826086956,
738
+ "grad_norm": 1.426059603691101,
739
+ "learning_rate": 1.5715375657489587e-06,
740
+ "loss": 0.4543,
741
+ "step": 760
742
+ },
743
+ {
744
+ "epoch": 33.47826086956522,
745
+ "grad_norm": 1.5478020906448364,
746
+ "learning_rate": 3.1430751314979174e-06,
747
+ "loss": 0.4608,
748
+ "step": 770
749
+ },
750
+ {
751
+ "epoch": 33.91304347826087,
752
+ "grad_norm": 1.2852143049240112,
753
+ "learning_rate": 4.714612697246876e-06,
754
+ "loss": 0.4359,
755
+ "step": 780
756
+ },
757
+ {
758
+ "epoch": 34.34782608695652,
759
+ "grad_norm": 1.3916325569152832,
760
+ "learning_rate": 6.286150262995835e-06,
761
+ "loss": 0.4366,
762
+ "step": 790
763
+ },
764
+ {
765
+ "epoch": 34.78260869565217,
766
+ "grad_norm": 1.4073759317398071,
767
+ "learning_rate": 7.857687828744793e-06,
768
+ "loss": 0.4145,
769
+ "step": 800
770
+ },
771
+ {
772
+ "epoch": 35.21739130434783,
773
+ "grad_norm": 1.258367896080017,
774
+ "learning_rate": 9.429225394493751e-06,
775
+ "loss": 0.5149,
776
+ "step": 810
777
+ },
778
+ {
779
+ "epoch": 35.65217391304348,
780
+ "grad_norm": 1.2376227378845215,
781
+ "learning_rate": 1.100076296024271e-05,
782
+ "loss": 0.4429,
783
+ "step": 820
784
+ },
785
+ {
786
+ "epoch": 36.08695652173913,
787
+ "grad_norm": 1.071475625038147,
788
+ "learning_rate": 1.257230052599167e-05,
789
+ "loss": 0.4587,
790
+ "step": 830
791
+ },
792
+ {
793
+ "epoch": 36.52173913043478,
794
+ "grad_norm": 1.109466552734375,
795
+ "learning_rate": 1.4143838091740628e-05,
796
+ "loss": 0.4244,
797
+ "step": 840
798
+ },
799
+ {
800
+ "epoch": 36.95652173913044,
801
+ "grad_norm": 1.3434367179870605,
802
+ "learning_rate": 1.5715375657489586e-05,
803
+ "loss": 0.4951,
804
+ "step": 850
805
+ },
806
+ {
807
+ "epoch": 37.391304347826086,
808
+ "grad_norm": 1.8077468872070312,
809
+ "learning_rate": 1.5715374100221386e-05,
810
+ "loss": 0.4675,
811
+ "step": 860
812
+ },
813
+ {
814
+ "epoch": 37.82608695652174,
815
+ "grad_norm": 1.2127968072891235,
816
+ "learning_rate": 1.5715369428417403e-05,
817
+ "loss": 0.4156,
818
+ "step": 870
819
+ },
820
+ {
821
+ "epoch": 38.26086956521739,
822
+ "grad_norm": 1.1871669292449951,
823
+ "learning_rate": 1.571536164207949e-05,
824
+ "loss": 0.4515,
825
+ "step": 880
826
+ },
827
+ {
828
+ "epoch": 38.69565217391305,
829
+ "grad_norm": 1.3719384670257568,
830
+ "learning_rate": 1.571535074121073e-05,
831
+ "loss": 0.4019,
832
+ "step": 890
833
+ },
834
+ {
835
+ "epoch": 39.130434782608695,
836
+ "grad_norm": 1.8886760473251343,
837
+ "learning_rate": 1.5715336725815448e-05,
838
+ "loss": 0.5218,
839
+ "step": 900
840
+ },
841
+ {
842
+ "epoch": 39.130434782608695,
843
+ "eval_loss": 0.7287566661834717,
844
+ "eval_runtime": 0.5312,
845
+ "eval_samples_per_second": 18.825,
846
+ "eval_steps_per_second": 18.825,
847
+ "step": 900
848
+ },
849
+ {
850
+ "epoch": 39.130434782608695,
851
+ "eval_loss": 0.8616224527359009,
852
+ "eval_runtime": 0.4551,
853
+ "eval_samples_per_second": 21.972,
854
+ "eval_steps_per_second": 21.972,
855
+ "step": 900
856
+ },
857
+ {
858
+ "epoch": 39.130434782608695,
859
+ "eval_loss": 0.7287566661834717,
860
+ "eval_runtime": 0.4445,
861
+ "eval_samples_per_second": 22.5,
862
+ "eval_steps_per_second": 22.5,
863
+ "step": 900
864
+ },
865
+ {
866
+ "epoch": 39.130434782608695,
867
+ "eval_loss": 0.7235647439956665,
868
+ "eval_runtime": 0.4458,
869
+ "eval_samples_per_second": 22.434,
870
+ "eval_steps_per_second": 22.434,
871
+ "step": 900
872
+ },
873
+ {
874
+ "epoch": 39.130434782608695,
875
+ "eval_loss": 0.8611491322517395,
876
+ "eval_runtime": 0.4432,
877
+ "eval_samples_per_second": 22.563,
878
+ "eval_steps_per_second": 22.563,
879
+ "step": 900
880
  }
881
  ],
882
  "logging_steps": 10,
 
896
  "attributes": {}
897
  }
898
  },
899
+ "total_flos": 2.3078822838165504e+16,
900
  "train_batch_size": 4,
901
  "trial_name": null,
902
  "trial_params": null