Add new SentenceTransformer model with an onnx backend

#5
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "USER-bge-m3",
3
  "architectures": [
4
  "XLMRobertaModel"
5
  ],
@@ -21,7 +21,7 @@
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
- "transformers_version": "4.41.2",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 46166
 
1
  {
2
+ "_name_or_path": "deepvk/USER-bge-m3",
3
  "architectures": [
4
  "XLMRobertaModel"
5
  ],
 
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
+ "transformers_version": "4.48.3",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 46166
config_sentence_transformers.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "__version__": {
3
- "sentence_transformers": "3.0.1",
4
- "transformers": "4.41.2",
5
- "pytorch": "2.1.0"
6
  },
7
  "prompts": {},
8
  "default_prompt_name": null,
9
- "similarity_fn_name": null
10
  }
 
1
  {
2
  "__version__": {
3
+ "sentence_transformers": "4.0.2",
4
+ "transformers": "4.48.3",
5
+ "pytorch": "2.5.1+cu124"
6
  },
7
  "prompts": {},
8
  "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
  }
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbcdcd4a487d286e7b271ea41dad515d3f4e9b62eba33e84056646ec41cc2f18
3
+ size 1432521616
tokenizer.json CHANGED
@@ -85,8 +85,8 @@
85
  "pre_tokenizer": {
86
  "type": "Metaspace",
87
  "replacement": "▁",
88
- "add_prefix_space": true,
89
- "prepend_scheme": "always"
90
  },
91
  "post_processor": {
92
  "type": "TemplateProcessing",
@@ -172,8 +172,8 @@
172
  "decoder": {
173
  "type": "Metaspace",
174
  "replacement": "▁",
175
- "add_prefix_space": true,
176
- "prepend_scheme": "always"
177
  },
178
  "model": {
179
  "type": "Unigram",
@@ -184846,4 +184846,4 @@
184846
  ],
184847
  "byte_fallback": false
184848
  }
184849
- }
 
85
  "pre_tokenizer": {
86
  "type": "Metaspace",
87
  "replacement": "▁",
88
+ "prepend_scheme": "always",
89
+ "split": true
90
  },
91
  "post_processor": {
92
  "type": "TemplateProcessing",
 
172
  "decoder": {
173
  "type": "Metaspace",
174
  "replacement": "▁",
175
+ "prepend_scheme": "always",
176
+ "split": true
177
  },
178
  "model": {
179
  "type": "Unigram",
 
184846
  ],
184847
  "byte_fallback": false
184848
  }
184849
+ }
tokenizer_config.json CHANGED
@@ -45,6 +45,7 @@
45
  "clean_up_tokenization_spaces": true,
46
  "cls_token": "<s>",
47
  "eos_token": "</s>",
 
48
  "mask_token": "<mask>",
49
  "max_length": 512,
50
  "model_max_length": 8192,
 
45
  "clean_up_tokenization_spaces": true,
46
  "cls_token": "<s>",
47
  "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
  "max_length": 512,
51
  "model_max_length": 8192,