sileod commited on
Commit
58de7c2
·
verified ·
1 Parent(s): 9d1e94f

Add new SentenceTransformer model

Browse files
Files changed (2) hide show
  1. README.md +43 -107
  2. model.safetensors +1 -1
README.md CHANGED
@@ -166,7 +166,7 @@ print(query_embeddings.shape, document_embeddings.shape)
166
  # Get the similarity scores for the embeddings
167
  similarities = model.similarity(query_embeddings, document_embeddings)
168
  print(similarities)
169
- # tensor([[ 0.6237, -0.0022, -0.1018]])
170
  ```
171
 
172
  <!--
@@ -726,13 +726,15 @@ You can finetune this model on your own dataset.
726
  ### Training Hyperparameters
727
  #### Non-Default Hyperparameters
728
 
729
- - `per_device_train_batch_size`: 256
730
- - `learning_rate`: 8e-05
731
  - `weight_decay`: 1e-06
732
- - `num_train_epochs`: 2
733
  - `warmup_ratio`: 0.1
734
  - `fp16`: True
735
  - `gradient_checkpointing`: True
 
 
736
 
737
  #### All Hyperparameters
738
  <details><summary>Click to expand</summary>
@@ -741,20 +743,20 @@ You can finetune this model on your own dataset.
741
  - `do_predict`: False
742
  - `eval_strategy`: no
743
  - `prediction_loss_only`: True
744
- - `per_device_train_batch_size`: 256
745
  - `per_device_eval_batch_size`: 8
746
  - `per_gpu_train_batch_size`: None
747
  - `per_gpu_eval_batch_size`: None
748
  - `gradient_accumulation_steps`: 1
749
  - `eval_accumulation_steps`: None
750
  - `torch_empty_cache_steps`: None
751
- - `learning_rate`: 8e-05
752
  - `weight_decay`: 1e-06
753
  - `adam_beta1`: 0.9
754
  - `adam_beta2`: 0.999
755
  - `adam_epsilon`: 1e-08
756
  - `max_grad_norm`: 1.0
757
- - `num_train_epochs`: 2
758
  - `max_steps`: -1
759
  - `lr_scheduler_type`: linear
760
  - `lr_scheduler_kwargs`: {}
@@ -838,8 +840,8 @@ You can finetune this model on your own dataset.
838
  - `torchdynamo`: None
839
  - `ray_scope`: last
840
  - `ddp_timeout`: 1800
841
- - `torch_compile`: False
842
- - `torch_compile_backend`: None
843
  - `torch_compile_mode`: None
844
  - `include_tokens_per_second`: False
845
  - `include_num_input_tokens_seen`: no
@@ -862,104 +864,38 @@ You can finetune this model on your own dataset.
862
  ### Training Logs
863
  | Epoch | Step | Training Loss |
864
  |:------:|:-----:|:-------------:|
865
- | 0.0202 | 500 | 4.5778 |
866
- | 0.0404 | 1000 | 3.5556 |
867
- | 0.0606 | 1500 | 2.5948 |
868
- | 0.0808 | 2000 | 2.3723 |
869
- | 0.1011 | 2500 | 2.1149 |
870
- | 0.1213 | 3000 | 2.3977 |
871
- | 0.1415 | 3500 | 2.3535 |
872
- | 0.1617 | 4000 | 1.9057 |
873
- | 0.1819 | 4500 | 2.1313 |
874
- | 0.2021 | 5000 | 2.1719 |
875
- | 0.2223 | 5500 | 1.887 |
876
- | 0.2425 | 6000 | 2.1792 |
877
- | 0.2627 | 6500 | 2.3001 |
878
- | 0.2830 | 7000 | 2.0002 |
879
- | 0.3032 | 7500 | 1.9358 |
880
- | 0.3234 | 8000 | 1.9074 |
881
- | 0.3436 | 8500 | 1.9204 |
882
- | 0.3638 | 9000 | 1.8991 |
883
- | 0.3840 | 9500 | 2.0086 |
884
- | 0.4042 | 10000 | 1.8229 |
885
- | 0.4244 | 10500 | 1.7437 |
886
- | 0.4446 | 11000 | 2.2012 |
887
- | 0.4649 | 11500 | 1.6898 |
888
- | 0.4851 | 12000 | 2.1212 |
889
- | 0.5053 | 12500 | 1.8014 |
890
- | 0.5255 | 13000 | 2.1112 |
891
- | 0.5457 | 13500 | 1.885 |
892
- | 0.5659 | 14000 | 1.6889 |
893
- | 0.5861 | 14500 | 1.6377 |
894
- | 0.6063 | 15000 | 1.8526 |
895
- | 0.6265 | 15500 | 1.8912 |
896
- | 0.6468 | 16000 | 1.8621 |
897
- | 0.6670 | 16500 | 1.743 |
898
- | 0.6872 | 17000 | 1.5893 |
899
- | 0.7074 | 17500 | 1.9079 |
900
- | 0.7276 | 18000 | 1.5885 |
901
- | 0.7478 | 18500 | 1.9128 |
902
- | 0.7680 | 19000 | 1.6654 |
903
- | 0.7882 | 19500 | 1.7099 |
904
- | 0.8084 | 20000 | 1.4688 |
905
- | 0.8287 | 20500 | 1.3844 |
906
- | 0.8489 | 21000 | 1.7908 |
907
- | 0.8691 | 21500 | 1.7075 |
908
- | 0.8893 | 22000 | 1.8114 |
909
- | 0.9095 | 22500 | 1.5198 |
910
- | 0.9297 | 23000 | 1.8605 |
911
- | 0.9499 | 23500 | 1.6604 |
912
- | 0.9701 | 24000 | 1.5891 |
913
- | 0.9903 | 24500 | 1.5906 |
914
- | 1.0106 | 25000 | 1.5027 |
915
- | 1.0308 | 25500 | 1.7599 |
916
- | 1.0510 | 26000 | 1.4124 |
917
- | 1.0712 | 26500 | 1.5636 |
918
- | 1.0914 | 27000 | 1.6126 |
919
- | 1.1116 | 27500 | 1.4625 |
920
- | 1.1318 | 28000 | 1.4467 |
921
- | 1.1520 | 28500 | 1.6898 |
922
- | 1.1722 | 29000 | 1.5088 |
923
- | 1.1924 | 29500 | 1.5158 |
924
- | 1.2127 | 30000 | 1.5266 |
925
- | 1.2329 | 30500 | 1.465 |
926
- | 1.2531 | 31000 | 1.5687 |
927
- | 1.2733 | 31500 | 1.4397 |
928
- | 1.2935 | 32000 | 1.7929 |
929
- | 1.3137 | 32500 | 1.5893 |
930
- | 1.3339 | 33000 | 1.4727 |
931
- | 1.3541 | 33500 | 1.6007 |
932
- | 1.3743 | 34000 | 1.2833 |
933
- | 1.3946 | 34500 | 1.5541 |
934
- | 1.4148 | 35000 | 1.3354 |
935
- | 1.4350 | 35500 | 1.4509 |
936
- | 1.4552 | 36000 | 1.6065 |
937
- | 1.4754 | 36500 | 1.6393 |
938
- | 1.4956 | 37000 | 1.3914 |
939
- | 1.5158 | 37500 | 1.3584 |
940
- | 1.5360 | 38000 | 1.5504 |
941
- | 1.5562 | 38500 | 1.2169 |
942
- | 1.5765 | 39000 | 1.4081 |
943
- | 1.5967 | 39500 | 1.5506 |
944
- | 1.6169 | 40000 | 1.473 |
945
- | 1.6371 | 40500 | 1.2517 |
946
- | 1.6573 | 41000 | 1.7644 |
947
- | 1.6775 | 41500 | 1.4237 |
948
- | 1.6977 | 42000 | 1.295 |
949
- | 1.7179 | 42500 | 1.4951 |
950
- | 1.7381 | 43000 | 1.4389 |
951
- | 1.7584 | 43500 | 1.5742 |
952
- | 1.7786 | 44000 | 1.4843 |
953
- | 1.7988 | 44500 | 1.4806 |
954
- | 1.8190 | 45000 | 1.3674 |
955
- | 1.8392 | 45500 | 1.329 |
956
- | 1.8594 | 46000 | 1.7644 |
957
- | 1.8796 | 46500 | 1.36 |
958
- | 1.8998 | 47000 | 1.2003 |
959
- | 1.9200 | 47500 | 1.233 |
960
- | 1.9403 | 48000 | 1.5147 |
961
- | 1.9605 | 48500 | 1.3838 |
962
- | 1.9807 | 49000 | 1.4928 |
963
 
964
 
965
  ### Framework Versions
 
166
  # Get the similarity scores for the embeddings
167
  similarities = model.similarity(query_embeddings, document_embeddings)
168
  print(similarities)
169
+ # tensor([[ 0.5738, 0.0240, -0.0787]])
170
  ```
171
 
172
  <!--
 
726
  ### Training Hyperparameters
727
  #### Non-Default Hyperparameters
728
 
729
+ - `per_device_train_batch_size`: 384
730
+ - `learning_rate`: 0.0001
731
  - `weight_decay`: 1e-06
732
+ - `num_train_epochs`: 1
733
  - `warmup_ratio`: 0.1
734
  - `fp16`: True
735
  - `gradient_checkpointing`: True
736
+ - `torch_compile`: True
737
+ - `torch_compile_backend`: inductor
738
 
739
  #### All Hyperparameters
740
  <details><summary>Click to expand</summary>
 
743
  - `do_predict`: False
744
  - `eval_strategy`: no
745
  - `prediction_loss_only`: True
746
+ - `per_device_train_batch_size`: 384
747
  - `per_device_eval_batch_size`: 8
748
  - `per_gpu_train_batch_size`: None
749
  - `per_gpu_eval_batch_size`: None
750
  - `gradient_accumulation_steps`: 1
751
  - `eval_accumulation_steps`: None
752
  - `torch_empty_cache_steps`: None
753
+ - `learning_rate`: 0.0001
754
  - `weight_decay`: 1e-06
755
  - `adam_beta1`: 0.9
756
  - `adam_beta2`: 0.999
757
  - `adam_epsilon`: 1e-08
758
  - `max_grad_norm`: 1.0
759
+ - `num_train_epochs`: 1
760
  - `max_steps`: -1
761
  - `lr_scheduler_type`: linear
762
  - `lr_scheduler_kwargs`: {}
 
840
  - `torchdynamo`: None
841
  - `ray_scope`: last
842
  - `ddp_timeout`: 1800
843
+ - `torch_compile`: True
844
+ - `torch_compile_backend`: inductor
845
  - `torch_compile_mode`: None
846
  - `include_tokens_per_second`: False
847
  - `include_num_input_tokens_seen`: no
 
864
  ### Training Logs
865
  | Epoch | Step | Training Loss |
866
  |:------:|:-----:|:-------------:|
867
+ | 0.0303 | 500 | 4.8473 |
868
+ | 0.0606 | 1000 | 2.6754 |
869
+ | 0.0909 | 1500 | 2.6358 |
870
+ | 0.1212 | 2000 | 2.619 |
871
+ | 0.1515 | 2500 | 2.8342 |
872
+ | 0.1818 | 3000 | 2.2872 |
873
+ | 0.2121 | 3500 | 2.2727 |
874
+ | 0.2424 | 4000 | 2.3469 |
875
+ | 0.2727 | 4500 | 2.1085 |
876
+ | 0.3030 | 5000 | 2.2076 |
877
+ | 0.3334 | 5500 | 2.1161 |
878
+ | 0.3637 | 6000 | 2.2332 |
879
+ | 0.3940 | 6500 | 2.1574 |
880
+ | 0.4243 | 7000 | 2.1012 |
881
+ | 0.4546 | 7500 | 1.946 |
882
+ | 0.4849 | 8000 | 1.7233 |
883
+ | 0.5152 | 8500 | 2.4444 |
884
+ | 0.5455 | 9000 | 2.1055 |
885
+ | 0.5758 | 9500 | 1.9107 |
886
+ | 0.6061 | 10000 | 2.0212 |
887
+ | 0.6364 | 10500 | 2.1029 |
888
+ | 0.6667 | 11000 | 1.8484 |
889
+ | 0.6970 | 11500 | 2.1658 |
890
+ | 0.7273 | 12000 | 2.1007 |
891
+ | 0.7576 | 12500 | 1.9194 |
892
+ | 0.7879 | 13000 | 1.6709 |
893
+ | 0.8182 | 13500 | 1.7653 |
894
+ | 0.8485 | 14000 | 1.952 |
895
+ | 0.8788 | 14500 | 1.8437 |
896
+ | 0.9091 | 15000 | 1.6667 |
897
+ | 0.9395 | 15500 | 1.7433 |
898
+ | 0.9698 | 16000 | 1.7623 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
899
 
900
 
901
  ### Framework Versions
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:895b49d6283aa8bc1a1bcf30e93046f410c8c32d946f0ee02e688c55f602024c
3
  size 127538496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7658290cf36da3d18ee7ebfc328f9c40bd49d23c22c9bf0cd9cb101c1c526c40
3
  size 127538496