ONNX-converted version of the model

We decided to swap the existing model for the [Code Scanner](https://llm-guard.com/input_scanners/code/) in [llm-guard](https://github.com/laiyer-ai/llm-guard) with your model. Our tests show much better accuracy compared to the HuggingFace's one.

To have faster inference, we use ONNX models converted using Optimum from HuggingFace.

Example of the repo with ONNX built-in: https://huggingface.co/laiyer/deberta-v3-base-prompt-injection

Files changed (7) hide show

onnx/config.json +84 -0
onnx/merges.txt +0 -0
onnx/model.onnx +3 -0
onnx/special_tokens_map.json +51 -0
onnx/tokenizer.json +0 -0
onnx/tokenizer_config.json +58 -0
onnx/vocab.json +0 -0

onnx/config.json ADDED Viewed

	@@ -0,0 +1,84 @@

+{
+  "_name_or_path": "philomath-1209/programming-language-identification",
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "Scala",
+    "1": "JavaScript",
+    "2": "COBOL",
+    "3": "ARM Assembly",
+    "4": "R",
+    "5": "Lua",
+    "6": "C++",
+    "7": "Visual Basic .NET",
+    "8": "Go",
+    "9": "Erlang",
+    "10": "C#",
+    "11": "Rust",
+    "12": "Ruby",
+    "13": "Swift",
+    "14": "Mathematica/Wolfram Language",
+    "15": "PHP",
+    "16": "Fortran",
+    "17": "AppleScript",
+    "18": "Pascal",
+    "19": "Java",
+    "20": "PowerShell",
+    "21": "Python",
+    "22": "C",
+    "23": "Perl",
+    "24": "Kotlin",
+    "25": "jq"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "ARM Assembly": 3,
+    "AppleScript": 17,
+    "C": 22,
+    "C#": 10,
+    "C++": 6,
+    "COBOL": 2,
+    "Erlang": 9,
+    "Fortran": 16,
+    "Go": 8,
+    "Java": 19,
+    "JavaScript": 1,
+    "Kotlin": 24,
+    "Lua": 5,
+    "Mathematica/Wolfram Language": 14,
+    "PHP": 15,
+    "Pascal": 18,
+    "Perl": 23,
+    "PowerShell": 20,
+    "Python": 21,
+    "R": 4,
+    "Ruby": 12,
+    "Rust": 11,
+    "Scala": 0,
+    "Swift": 13,
+    "Visual Basic .NET": 7,
+    "jq": 25
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 52000
+}

onnx/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

onnx/model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d9bf25fe9933c20db353c90fd5295554d4c02f06f6f052d8ec1ef87c623e0bf
+size 334030262

onnx/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

onnx/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

onnx/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "max_len": 512,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

onnx/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff