besimray commited on
Commit
7b18443
·
verified ·
1 Parent(s): 5d1760f

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:818a070534240e91d68e2f203e19637a55fb6d281983e6b1f4db372769f90baf
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e36bb4966b3713f17079f0f0073225f3c17789e78598436f125bc5847c546220
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:632626d38f137f1b532904c86748923f618de2d84d09066f3d0aab67269c0719
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31631c7141c9eed8d3d67722b7f007bb55e7b4644efb82e4b7c07b72a46d6b5f
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c212ac5b1e7f34fdb83be045ed0267f2d345126b22dfd1da63ed82ec4ee137d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:330e765b24011cd6e18b8db74d77f7195e5780a184071a5df72e72c642350c23
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7736f824b0a772b3806e37af9e860068207311e879196bc19f8a76d97eaf6bce
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61c2b4927e3039b26d377375be782c03ce853d193f96b5868ccf559441e84af9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.1519354581832886,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
4
- "epoch": 2.1052631578947367,
5
  "eval_steps": 20,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -755,6 +755,154 @@
755
  "eval_samples_per_second": 48.569,
756
  "eval_steps_per_second": 4.857,
757
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
  }
759
  ],
760
  "logging_steps": 1,
@@ -769,7 +917,7 @@
769
  "early_stopping_threshold": 0.0
770
  },
771
  "attributes": {
772
- "early_stopping_patience_counter": 2
773
  }
774
  },
775
  "TrainerControl": {
@@ -778,12 +926,12 @@
778
  "should_evaluate": false,
779
  "should_log": false,
780
  "should_save": true,
781
- "should_training_stop": false
782
  },
783
  "attributes": {}
784
  }
785
  },
786
- "total_flos": 1.01497464815616e+16,
787
  "train_batch_size": 10,
788
  "trial_name": null,
789
  "trial_params": null
 
1
  {
2
  "best_metric": 1.1519354581832886,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
4
+ "epoch": 2.526315789473684,
5
  "eval_steps": 20,
6
+ "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
755
  "eval_samples_per_second": 48.569,
756
  "eval_steps_per_second": 4.857,
757
  "step": 100
758
+ },
759
+ {
760
+ "epoch": 2.126315789473684,
761
+ "grad_norm": 0.47087791562080383,
762
+ "learning_rate": 5.4600950026045326e-05,
763
+ "loss": 0.994,
764
+ "step": 101
765
+ },
766
+ {
767
+ "epoch": 2.1473684210526316,
768
+ "grad_norm": 0.46321335434913635,
769
+ "learning_rate": 5.261313375270014e-05,
770
+ "loss": 0.8965,
771
+ "step": 102
772
+ },
773
+ {
774
+ "epoch": 2.168421052631579,
775
+ "grad_norm": 0.48722636699676514,
776
+ "learning_rate": 5.0649178193565314e-05,
777
+ "loss": 1.0028,
778
+ "step": 103
779
+ },
780
+ {
781
+ "epoch": 2.1894736842105265,
782
+ "grad_norm": 0.5477016568183899,
783
+ "learning_rate": 4.87100722594094e-05,
784
+ "loss": 0.9755,
785
+ "step": 104
786
+ },
787
+ {
788
+ "epoch": 2.2105263157894735,
789
+ "grad_norm": 0.43870726227760315,
790
+ "learning_rate": 4.6796792348466356e-05,
791
+ "loss": 0.9023,
792
+ "step": 105
793
+ },
794
+ {
795
+ "epoch": 2.231578947368421,
796
+ "grad_norm": 0.4974609911441803,
797
+ "learning_rate": 4.491030185478976e-05,
798
+ "loss": 1.0978,
799
+ "step": 106
800
+ },
801
+ {
802
+ "epoch": 2.2526315789473683,
803
+ "grad_norm": 0.48663774132728577,
804
+ "learning_rate": 4.305155068315481e-05,
805
+ "loss": 1.1326,
806
+ "step": 107
807
+ },
808
+ {
809
+ "epoch": 2.2736842105263158,
810
+ "grad_norm": 0.47879499197006226,
811
+ "learning_rate": 4.12214747707527e-05,
812
+ "loss": 0.8403,
813
+ "step": 108
814
+ },
815
+ {
816
+ "epoch": 2.294736842105263,
817
+ "grad_norm": 0.4391883909702301,
818
+ "learning_rate": 3.942099561591802e-05,
819
+ "loss": 1.0096,
820
+ "step": 109
821
+ },
822
+ {
823
+ "epoch": 2.3157894736842106,
824
+ "grad_norm": 0.5225970149040222,
825
+ "learning_rate": 3.7651019814126654e-05,
826
+ "loss": 0.9684,
827
+ "step": 110
828
+ },
829
+ {
830
+ "epoch": 2.336842105263158,
831
+ "grad_norm": 0.529344379901886,
832
+ "learning_rate": 3.591243860149759e-05,
833
+ "loss": 0.9164,
834
+ "step": 111
835
+ },
836
+ {
837
+ "epoch": 2.3578947368421055,
838
+ "grad_norm": 0.4865782856941223,
839
+ "learning_rate": 3.4206127406028745e-05,
840
+ "loss": 1.0993,
841
+ "step": 112
842
+ },
843
+ {
844
+ "epoch": 2.3789473684210525,
845
+ "grad_norm": 0.4908663332462311,
846
+ "learning_rate": 3.253294540679257e-05,
847
+ "loss": 1.1203,
848
+ "step": 113
849
+ },
850
+ {
851
+ "epoch": 2.4,
852
+ "grad_norm": 0.4688137471675873,
853
+ "learning_rate": 3.089373510131354e-05,
854
+ "loss": 0.8358,
855
+ "step": 114
856
+ },
857
+ {
858
+ "epoch": 2.4210526315789473,
859
+ "grad_norm": 0.5007145404815674,
860
+ "learning_rate": 2.9289321881345254e-05,
861
+ "loss": 1.0975,
862
+ "step": 115
863
+ },
864
+ {
865
+ "epoch": 2.442105263157895,
866
+ "grad_norm": 0.4280741214752197,
867
+ "learning_rate": 2.7720513617260856e-05,
868
+ "loss": 1.0134,
869
+ "step": 116
870
+ },
871
+ {
872
+ "epoch": 2.463157894736842,
873
+ "grad_norm": 0.5474169850349426,
874
+ "learning_rate": 2.6188100251265945e-05,
875
+ "loss": 0.9781,
876
+ "step": 117
877
+ },
878
+ {
879
+ "epoch": 2.4842105263157896,
880
+ "grad_norm": 0.4554167091846466,
881
+ "learning_rate": 2.4692853399638917e-05,
882
+ "loss": 1.082,
883
+ "step": 118
884
+ },
885
+ {
886
+ "epoch": 2.5052631578947366,
887
+ "grad_norm": 0.5812304615974426,
888
+ "learning_rate": 2.323552596419889e-05,
889
+ "loss": 0.9826,
890
+ "step": 119
891
+ },
892
+ {
893
+ "epoch": 2.526315789473684,
894
+ "grad_norm": 0.4756172001361847,
895
+ "learning_rate": 2.181685175319702e-05,
896
+ "loss": 1.1045,
897
+ "step": 120
898
+ },
899
+ {
900
+ "epoch": 2.526315789473684,
901
+ "eval_loss": 1.1679396629333496,
902
+ "eval_runtime": 2.0595,
903
+ "eval_samples_per_second": 48.555,
904
+ "eval_steps_per_second": 4.856,
905
+ "step": 120
906
  }
907
  ],
908
  "logging_steps": 1,
 
917
  "early_stopping_threshold": 0.0
918
  },
919
  "attributes": {
920
+ "early_stopping_patience_counter": 3
921
  }
922
  },
923
  "TrainerControl": {
 
926
  "should_evaluate": false,
927
  "should_log": false,
928
  "should_save": true,
929
+ "should_training_stop": true
930
  },
931
  "attributes": {}
932
  }
933
  },
934
+ "total_flos": 1.214189411500032e+16,
935
  "train_batch_size": 10,
936
  "trial_name": null,
937
  "trial_params": null