Rakhman16 commited on
Commit
ba17f67
·
verified ·
1 Parent(s): 9fedd13

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9397f132a6123749318f2cec2de3795c2cecb21b04af496e60060bdf559d882
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:315dd3d1be5cd5aad93e16d6cfd64f1bb9fe3d28b4bf1a28890a1a22e06b4268
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c4d354f40628a2bf76efa3fc41baf5125cffd8c92ac7a1e648705f2d017dfe1
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2aded1500c0a4e0686d69b68e13b1d801287a27d997750ae1545e6654570347
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c628fd47930868a3626bfd463d6fa585c5249cd4b2ad88dfb998ebdaeffc2454
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:448b3e60abf19a367f627475ea9fd93123102153c10bf14e51ba3e6e1e24bd8e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b542bbc256ffe03bc3de81e397affacfdab8368eb5fbeffaeab7a4d3289a6f9f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11165328e3ed53c81315a2b3c898e1767b47d1d7722e15ceb97f24911d09fdce
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.2032385915517807,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-3500",
4
- "epoch": 2.4587284861257466,
5
  "eval_steps": 100,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -777,6 +777,116 @@
777
  "eval_samples_per_second": 66.548,
778
  "eval_steps_per_second": 2.089,
779
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  }
781
  ],
782
  "logging_steps": 50,
@@ -796,7 +906,7 @@
796
  "attributes": {}
797
  }
798
  },
799
- "total_flos": 3.40970746871808e+16,
800
  "train_batch_size": 32,
801
  "trial_name": null,
802
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.20224925875663757,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-4000",
4
+ "epoch": 2.8099754127151386,
5
  "eval_steps": 100,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
777
  "eval_samples_per_second": 66.548,
778
  "eval_steps_per_second": 2.089,
779
  "step": 3500
780
+ },
781
+ {
782
+ "epoch": 2.493853178784686,
783
+ "grad_norm": 23787.681640625,
784
+ "learning_rate": 1.128952916373858e-05,
785
+ "loss": 0.1984,
786
+ "step": 3550
787
+ },
788
+ {
789
+ "epoch": 2.528977871443625,
790
+ "grad_norm": 24526.529296875,
791
+ "learning_rate": 1.1026001405481377e-05,
792
+ "loss": 0.1971,
793
+ "step": 3600
794
+ },
795
+ {
796
+ "epoch": 2.528977871443625,
797
+ "eval_loss": 0.20272360742092133,
798
+ "eval_runtime": 66.8824,
799
+ "eval_samples_per_second": 66.684,
800
+ "eval_steps_per_second": 2.093,
801
+ "step": 3600
802
+ },
803
+ {
804
+ "epoch": 2.564102564102564,
805
+ "grad_norm": 23948.60546875,
806
+ "learning_rate": 1.0762473647224174e-05,
807
+ "loss": 0.1904,
808
+ "step": 3650
809
+ },
810
+ {
811
+ "epoch": 2.5992272567615036,
812
+ "grad_norm": 17924.513671875,
813
+ "learning_rate": 1.0498945888966972e-05,
814
+ "loss": 0.1968,
815
+ "step": 3700
816
+ },
817
+ {
818
+ "epoch": 2.5992272567615036,
819
+ "eval_loss": 0.20258785784244537,
820
+ "eval_runtime": 67.0213,
821
+ "eval_samples_per_second": 66.546,
822
+ "eval_steps_per_second": 2.089,
823
+ "step": 3700
824
+ },
825
+ {
826
+ "epoch": 2.6343519494204424,
827
+ "grad_norm": 18695.21875,
828
+ "learning_rate": 1.0235418130709768e-05,
829
+ "loss": 0.1961,
830
+ "step": 3750
831
+ },
832
+ {
833
+ "epoch": 2.669476642079382,
834
+ "grad_norm": 23424.083984375,
835
+ "learning_rate": 9.971890372452565e-06,
836
+ "loss": 0.1961,
837
+ "step": 3800
838
+ },
839
+ {
840
+ "epoch": 2.669476642079382,
841
+ "eval_loss": 0.2024257928133011,
842
+ "eval_runtime": 67.1877,
843
+ "eval_samples_per_second": 66.381,
844
+ "eval_steps_per_second": 2.084,
845
+ "step": 3800
846
+ },
847
+ {
848
+ "epoch": 2.704601334738321,
849
+ "grad_norm": 18417.158203125,
850
+ "learning_rate": 9.708362614195362e-06,
851
+ "loss": 0.2004,
852
+ "step": 3850
853
+ },
854
+ {
855
+ "epoch": 2.73972602739726,
856
+ "grad_norm": 29204.578125,
857
+ "learning_rate": 9.444834855938158e-06,
858
+ "loss": 0.2,
859
+ "step": 3900
860
+ },
861
+ {
862
+ "epoch": 2.73972602739726,
863
+ "eval_loss": 0.20261028409004211,
864
+ "eval_runtime": 67.145,
865
+ "eval_samples_per_second": 66.423,
866
+ "eval_steps_per_second": 2.085,
867
+ "step": 3900
868
+ },
869
+ {
870
+ "epoch": 2.7748507200561994,
871
+ "grad_norm": 22810.859375,
872
+ "learning_rate": 9.181307097680956e-06,
873
+ "loss": 0.1955,
874
+ "step": 3950
875
+ },
876
+ {
877
+ "epoch": 2.8099754127151386,
878
+ "grad_norm": 20385.189453125,
879
+ "learning_rate": 8.917779339423753e-06,
880
+ "loss": 0.1902,
881
+ "step": 4000
882
+ },
883
+ {
884
+ "epoch": 2.8099754127151386,
885
+ "eval_loss": 0.20224925875663757,
886
+ "eval_runtime": 66.8567,
887
+ "eval_samples_per_second": 66.71,
888
+ "eval_steps_per_second": 2.094,
889
+ "step": 4000
890
  }
891
  ],
892
  "logging_steps": 50,
 
906
  "attributes": {}
907
  }
908
  },
909
+ "total_flos": 3.89687378116608e+16,
910
  "train_batch_size": 32,
911
  "trial_name": null,
912
  "trial_params": null