irishprancer commited on
Commit
600fbe3
·
verified ·
1 Parent(s): 7255d85

Training in progress, step 900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c199b9e3b512305005f55eb68da324ab293b3b8950e1f5a272d3930921d33d9b
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a27a7c2ec6b774731f6aa003dbe657cf0f987fe25d734da3afab88baa02c39d
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b022a2a2d53b6a44873d81b2a118fb3a243719db4c3693bec47989187fe8332
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:878dd48bd56764e8178656b692092a26f8627ce77d11d5f9b0b03561a5b8d514
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ca9142fcbd976a2b9880762578e5776f18d9cad34016a627060f41ab78ec47d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5535f299045fe8f5c903e1fe89ea07c865852a623fd1578e00cbe2ef4b1aa5e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1436fdc963b029d14915fc9410fa4da5908bac7c909f67f4bfe1e0807f32ecef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6933cf40adb222251144f8b6059d0312a60442b27edead7b2d2d4edd50c35f13
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7167752981185913,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 32.608695652173914,
5
  "eval_steps": 150,
6
- "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -732,6 +732,151 @@
732
  "eval_samples_per_second": 25.574,
733
  "eval_steps_per_second": 25.574,
734
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735
  }
736
  ],
737
  "logging_steps": 10,
@@ -751,7 +896,7 @@
751
  "attributes": {}
752
  }
753
  },
754
- "total_flos": 1.9231766387195904e+16,
755
  "train_batch_size": 4,
756
  "trial_name": null,
757
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7167752981185913,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 39.130434782608695,
5
  "eval_steps": 150,
6
+ "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
732
  "eval_samples_per_second": 25.574,
733
  "eval_steps_per_second": 25.574,
734
  "step": 750
735
+ },
736
+ {
737
+ "epoch": 33.04347826086956,
738
+ "grad_norm": 1.426628589630127,
739
+ "learning_rate": 1.5787846818616295e-06,
740
+ "loss": 0.4544,
741
+ "step": 760
742
+ },
743
+ {
744
+ "epoch": 33.47826086956522,
745
+ "grad_norm": 1.549428105354309,
746
+ "learning_rate": 3.157569363723259e-06,
747
+ "loss": 0.461,
748
+ "step": 770
749
+ },
750
+ {
751
+ "epoch": 33.91304347826087,
752
+ "grad_norm": 1.2880816459655762,
753
+ "learning_rate": 4.736354045584888e-06,
754
+ "loss": 0.4361,
755
+ "step": 780
756
+ },
757
+ {
758
+ "epoch": 34.34782608695652,
759
+ "grad_norm": 1.3904523849487305,
760
+ "learning_rate": 6.315138727446518e-06,
761
+ "loss": 0.4367,
762
+ "step": 790
763
+ },
764
+ {
765
+ "epoch": 34.78260869565217,
766
+ "grad_norm": 1.4110171794891357,
767
+ "learning_rate": 7.893923409308147e-06,
768
+ "loss": 0.4146,
769
+ "step": 800
770
+ },
771
+ {
772
+ "epoch": 35.21739130434783,
773
+ "grad_norm": 1.2572520971298218,
774
+ "learning_rate": 9.472708091169777e-06,
775
+ "loss": 0.515,
776
+ "step": 810
777
+ },
778
+ {
779
+ "epoch": 35.65217391304348,
780
+ "grad_norm": 1.2433037757873535,
781
+ "learning_rate": 1.1051492773031405e-05,
782
+ "loss": 0.4429,
783
+ "step": 820
784
+ },
785
+ {
786
+ "epoch": 36.08695652173913,
787
+ "grad_norm": 1.068438172340393,
788
+ "learning_rate": 1.2630277454893036e-05,
789
+ "loss": 0.4585,
790
+ "step": 830
791
+ },
792
+ {
793
+ "epoch": 36.52173913043478,
794
+ "grad_norm": 1.1094129085540771,
795
+ "learning_rate": 1.4209062136754664e-05,
796
+ "loss": 0.4247,
797
+ "step": 840
798
+ },
799
+ {
800
+ "epoch": 36.95652173913044,
801
+ "grad_norm": 1.3411928415298462,
802
+ "learning_rate": 1.5787846818616294e-05,
803
+ "loss": 0.4956,
804
+ "step": 850
805
+ },
806
+ {
807
+ "epoch": 37.391304347826086,
808
+ "grad_norm": 1.8016853332519531,
809
+ "learning_rate": 1.5787845254166782e-05,
810
+ "loss": 0.4679,
811
+ "step": 860
812
+ },
813
+ {
814
+ "epoch": 37.82608695652174,
815
+ "grad_norm": 1.2095096111297607,
816
+ "learning_rate": 1.5787840560818862e-05,
817
+ "loss": 0.4155,
818
+ "step": 870
819
+ },
820
+ {
821
+ "epoch": 38.26086956521739,
822
+ "grad_norm": 1.1868607997894287,
823
+ "learning_rate": 1.5787832738574396e-05,
824
+ "loss": 0.4514,
825
+ "step": 880
826
+ },
827
+ {
828
+ "epoch": 38.69565217391305,
829
+ "grad_norm": 1.377031922340393,
830
+ "learning_rate": 1.5787821787436484e-05,
831
+ "loss": 0.4023,
832
+ "step": 890
833
+ },
834
+ {
835
+ "epoch": 39.130434782608695,
836
+ "grad_norm": 1.88169527053833,
837
+ "learning_rate": 1.578780770740947e-05,
838
+ "loss": 0.5221,
839
+ "step": 900
840
+ },
841
+ {
842
+ "epoch": 39.130434782608695,
843
+ "eval_loss": 0.7288335561752319,
844
+ "eval_runtime": 0.4014,
845
+ "eval_samples_per_second": 24.912,
846
+ "eval_steps_per_second": 24.912,
847
+ "step": 900
848
+ },
849
+ {
850
+ "epoch": 39.130434782608695,
851
+ "eval_loss": 0.8609212040901184,
852
+ "eval_runtime": 0.5203,
853
+ "eval_samples_per_second": 19.219,
854
+ "eval_steps_per_second": 19.219,
855
+ "step": 900
856
+ },
857
+ {
858
+ "epoch": 39.130434782608695,
859
+ "eval_loss": 0.7288335561752319,
860
+ "eval_runtime": 0.5688,
861
+ "eval_samples_per_second": 17.58,
862
+ "eval_steps_per_second": 17.58,
863
+ "step": 900
864
+ },
865
+ {
866
+ "epoch": 39.130434782608695,
867
+ "eval_loss": 0.7238188982009888,
868
+ "eval_runtime": 0.4689,
869
+ "eval_samples_per_second": 21.324,
870
+ "eval_steps_per_second": 21.324,
871
+ "step": 900
872
+ },
873
+ {
874
+ "epoch": 39.130434782608695,
875
+ "eval_loss": 0.861186146736145,
876
+ "eval_runtime": 0.3946,
877
+ "eval_samples_per_second": 25.341,
878
+ "eval_steps_per_second": 25.341,
879
+ "step": 900
880
  }
881
  ],
882
  "logging_steps": 10,
 
896
  "attributes": {}
897
  }
898
  },
899
+ "total_flos": 2.3078822838165504e+16,
900
  "train_batch_size": 4,
901
  "trial_name": null,
902
  "trial_params": null