ILT37 commited on
Commit
9ac98af
·
verified ·
1 Parent(s): a14a2b2

Upload 8 files

Browse files
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +5 -158
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c28dcaa2d2be8d4704cab165c677f25b4270b624587e7633ac36c7a5f8e892a8
3
  size 343679232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de097412cad8746b6eb19ea1dc0e2199ef3d2a9a8accae460a9a51bf4e2f8331
3
  size 343679232
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc1d73444a304e94e5a3a83c6813d1158e4e90f52fc0bf7358b8a2e88e8b8664
3
  size 687473786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f862b26aa8c421663d1c84cd6f1f01648f91e1bd1fbe4fca01c7179e3e57342
3
  size 687473786
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:871a2b0b3d0be54fee74343292af9b7281a3f77417a97c8b9e2b20f80e215aa5
3
  size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec859b2ce68dc22239ae89fc89ca51be37f1eb1e4eec3225a7e5f38fec447b57
3
  size 13990
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c855437d80306b6e4683c73fbc3bbe2978ddcaaa00039979af9c1a9e50f7f25
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56247b3349547ea82644afccfe004506052b457e363254cda8547006aa0a9fb8
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.9240393208221627,
3
- "best_model_checkpoint": "pokemon_models\\checkpoint-1750",
4
- "epoch": 28.0,
5
  "eval_steps": 500,
6
- "global_step": 1960,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -694,159 +694,6 @@
694
  "eval_samples_per_second": 10.376,
695
  "eval_steps_per_second": 0.649,
696
  "step": 1610
697
- },
698
- {
699
- "epoch": 23.14,
700
- "learning_rate": 1.0555555555555555e-05,
701
- "loss": 0.5869,
702
- "step": 1620
703
- },
704
- {
705
- "epoch": 23.43,
706
- "learning_rate": 1e-05,
707
- "loss": 0.5809,
708
- "step": 1640
709
- },
710
- {
711
- "epoch": 23.71,
712
- "learning_rate": 9.444444444444445e-06,
713
- "loss": 0.5972,
714
- "step": 1660
715
- },
716
- {
717
- "epoch": 24.0,
718
- "learning_rate": 8.88888888888889e-06,
719
- "loss": 0.5726,
720
- "step": 1680
721
- },
722
- {
723
- "epoch": 24.0,
724
- "eval_accuracy": 0.9186773905272565,
725
- "eval_loss": 0.8351905345916748,
726
- "eval_runtime": 109.1081,
727
- "eval_samples_per_second": 10.256,
728
- "eval_steps_per_second": 0.642,
729
- "step": 1680
730
- },
731
- {
732
- "epoch": 24.29,
733
- "learning_rate": 8.333333333333334e-06,
734
- "loss": 0.5562,
735
- "step": 1700
736
- },
737
- {
738
- "epoch": 24.57,
739
- "learning_rate": 7.777777777777777e-06,
740
- "loss": 0.5687,
741
- "step": 1720
742
- },
743
- {
744
- "epoch": 24.86,
745
- "learning_rate": 7.222222222222222e-06,
746
- "loss": 0.5418,
747
- "step": 1740
748
- },
749
- {
750
- "epoch": 25.0,
751
- "eval_accuracy": 0.9240393208221627,
752
- "eval_loss": 0.8053392767906189,
753
- "eval_runtime": 111.5634,
754
- "eval_samples_per_second": 10.03,
755
- "eval_steps_per_second": 0.627,
756
- "step": 1750
757
- },
758
- {
759
- "epoch": 25.14,
760
- "learning_rate": 6.666666666666667e-06,
761
- "loss": 0.5496,
762
- "step": 1760
763
- },
764
- {
765
- "epoch": 25.43,
766
- "learning_rate": 6.111111111111111e-06,
767
- "loss": 0.5354,
768
- "step": 1780
769
- },
770
- {
771
- "epoch": 25.71,
772
- "learning_rate": 5.555555555555556e-06,
773
- "loss": 0.5564,
774
- "step": 1800
775
- },
776
- {
777
- "epoch": 26.0,
778
- "learning_rate": 5e-06,
779
- "loss": 0.579,
780
- "step": 1820
781
- },
782
- {
783
- "epoch": 26.0,
784
- "eval_accuracy": 0.9151027703306523,
785
- "eval_loss": 0.8136078119277954,
786
- "eval_runtime": 105.9237,
787
- "eval_samples_per_second": 10.564,
788
- "eval_steps_per_second": 0.661,
789
- "step": 1820
790
- },
791
- {
792
- "epoch": 26.29,
793
- "learning_rate": 4.444444444444445e-06,
794
- "loss": 0.5322,
795
- "step": 1840
796
- },
797
- {
798
- "epoch": 26.57,
799
- "learning_rate": 3.888888888888889e-06,
800
- "loss": 0.5529,
801
- "step": 1860
802
- },
803
- {
804
- "epoch": 26.86,
805
- "learning_rate": 3.3333333333333333e-06,
806
- "loss": 0.5077,
807
- "step": 1880
808
- },
809
- {
810
- "epoch": 27.0,
811
- "eval_accuracy": 0.9168900804289544,
812
- "eval_loss": 0.7921976447105408,
813
- "eval_runtime": 105.3647,
814
- "eval_samples_per_second": 10.62,
815
- "eval_steps_per_second": 0.664,
816
- "step": 1890
817
- },
818
- {
819
- "epoch": 27.14,
820
- "learning_rate": 2.777777777777778e-06,
821
- "loss": 0.5443,
822
- "step": 1900
823
- },
824
- {
825
- "epoch": 27.43,
826
- "learning_rate": 2.2222222222222225e-06,
827
- "loss": 0.5181,
828
- "step": 1920
829
- },
830
- {
831
- "epoch": 27.71,
832
- "learning_rate": 1.6666666666666667e-06,
833
- "loss": 0.5318,
834
- "step": 1940
835
- },
836
- {
837
- "epoch": 28.0,
838
- "learning_rate": 1.1111111111111112e-06,
839
- "loss": 0.5138,
840
- "step": 1960
841
- },
842
- {
843
- "epoch": 28.0,
844
- "eval_accuracy": 0.9133154602323503,
845
- "eval_loss": 0.7894989252090454,
846
- "eval_runtime": 107.1689,
847
- "eval_samples_per_second": 10.441,
848
- "eval_steps_per_second": 0.653,
849
- "step": 1960
850
  }
851
  ],
852
  "logging_steps": 20,
@@ -854,7 +701,7 @@
854
  "num_input_tokens_seen": 0,
855
  "num_train_epochs": 29,
856
  "save_steps": 500,
857
- "total_flos": 9.718280747932926e+18,
858
  "train_batch_size": 16,
859
  "trial_name": null,
860
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9142091152815014,
3
+ "best_model_checkpoint": "pokemon_models\\checkpoint-1610",
4
+ "epoch": 23.0,
5
  "eval_steps": 500,
6
+ "global_step": 1610,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
694
  "eval_samples_per_second": 10.376,
695
  "eval_steps_per_second": 0.649,
696
  "step": 1610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
  }
698
  ],
699
  "logging_steps": 20,
 
701
  "num_input_tokens_seen": 0,
702
  "num_train_epochs": 29,
703
  "save_steps": 500,
704
+ "total_flos": 7.982873471516332e+18,
705
  "train_batch_size": 16,
706
  "trial_name": null,
707
  "trial_params": null