paraphrase-MiniLM-L3-v2-onnx-o3-cpu

Browse files

Files changed (10) hide show

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/.gitattributes +36 -0
paraphrase-MiniLM-L3-v2-onnx-o3-cpu/README.md +51 -0
paraphrase-MiniLM-L3-v2-onnx-o3-cpu/config.json +25 -0
paraphrase-MiniLM-L3-v2-onnx-o3-cpu/model.onnx +3 -0
paraphrase-MiniLM-L3-v2-onnx-o3-cpu/ort_config.json +39 -0
paraphrase-MiniLM-L3-v2-onnx-o3-cpu/source.txt +1 -0
paraphrase-MiniLM-L3-v2-onnx-o3-cpu/special_tokens_map.json +37 -0
paraphrase-MiniLM-L3-v2-onnx-o3-cpu/tokenizer.json +0 -0
paraphrase-MiniLM-L3-v2-onnx-o3-cpu/tokenizer_config.json +64 -0
paraphrase-MiniLM-L3-v2-onnx-o3-cpu/vocab.txt +0 -0

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/README.md ADDED Viewed

	@@ -0,0 +1,51 @@

+---
+pipeline_tag: feature-extraction
+tags:
+  - sentence-transformers
+  - feature-extraction
+  - sentence-similarity
+language: en
+license: apache-2.0
+---
+# ONNX Conversion of [sentence-transformers/paraphrase-MiniLM-L3-v2](https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L3-v2)
+- ONNX model for CPU with O3 optimisation
+- This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
+## Usage
+```python
+import torch
+import torch.nn.functional as F
+from optimum.onnxruntime import ORTModelForFeatureExtraction
+from transformers import AutoTokenizer
+sentences = [
+    "The llama (/ˈlɑːmə/) (Lama glama) is a domesticated South American camelid.",
+    "The alpaca (Lama pacos) is a species of South American camelid mammal.",
+    "The vicuña (Lama vicugna) (/vɪˈkuːnjə/) is one of the two wild South American camelids.",
+]
+model_name = "EmbeddedLLM/paraphrase-MiniLM-L3-v2-onnx-o3-cpu"
+device = "cpu"
+provider = "CPUExecutionProvider"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = ORTModelForFeatureExtraction.from_pretrained(
+    model_name, use_io_binding=True, provider=provider, device_map=device
+)
+inputs = tokenizer(
+    sentences,
+    padding=True,
+    truncation=True,
+    return_tensors="pt",
+    max_length=model.config.max_position_embeddings,
+)
+inputs = inputs.to(device)
+token_embeddings = model(**inputs).last_hidden_state
+# Pool
+att_mask = inputs["attention_mask"].unsqueeze(-1).expand(token_embeddings.size()).float()
+embeddings = torch.sum(token_embeddings * att_mask, 1) / torch.clamp(att_mask.sum(1), min=1e-9)
+embeddings = F.normalize(embeddings, p=2, dim=1)
+print(embeddings.cpu().numpy().shape)
+```

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "sentence-transformers/paraphrase-MiniLM-L3-v2",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 3,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.36.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9c077487e39da7a3ab432a54e2e9b0d63c42d8a385c14c0f9780292104a1883
+size 68981619

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/ort_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "one_external_file": true,
+  "opset": null,
+  "optimization": {
+    "disable_attention": null,
+    "disable_attention_fusion": false,
+    "disable_bias_gelu": null,
+    "disable_bias_gelu_fusion": false,
+    "disable_bias_skip_layer_norm": null,
+    "disable_bias_skip_layer_norm_fusion": false,
+    "disable_embed_layer_norm": true,
+    "disable_embed_layer_norm_fusion": true,
+    "disable_gelu": null,
+    "disable_gelu_fusion": false,
+    "disable_group_norm_fusion": true,
+    "disable_layer_norm": null,
+    "disable_layer_norm_fusion": false,
+    "disable_packed_kv": true,
+    "disable_rotary_embeddings": false,
+    "disable_shape_inference": false,
+    "disable_skip_layer_norm": null,
+    "disable_skip_layer_norm_fusion": false,
+    "enable_gelu_approximation": true,
+    "enable_gemm_fast_gelu_fusion": false,
+    "enable_transformers_specific_optimizations": true,
+    "fp16": false,
+    "no_attention_mask": false,
+    "optimization_level": 2,
+    "optimize_for_gpu": false,
+    "optimize_with_onnxruntime_only": null,
+    "use_mask_index": false,
+    "use_multi_head_attention": false,
+    "use_raw_attention_mask": false
+  },
+  "optimum_version": "1.15.0",
+  "quantization": {},
+  "transformers_version": "4.36.2",
+  "use_external_data_format": false
+}

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/source.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ https://huggingface.co/EmbeddedLLM/paraphrase-MiniLM-L3-v2-onnx-o3-cpu

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 128,
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

paraphrase-MiniLM-L3-v2-onnx-o3-cpu/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff