Upload 8 files

Browse files

Files changed (7) hide show

config.json +2 -3
onnx/model.onnx +3 -0
onnx/model_quantized.onnx +3 -0
quantize_config.json +38 -0
special_tokens_map.json +49 -7
tokenizer.json +85 -23
tokenizer_config.json +53 -10

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "klue/roberta-base",
   "architectures": [
     "RobertaForSequenceClassification"
   ],
@@ -22,8 +22,7 @@
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "tokenizer_class": "BertTokenizer",
-  "torch_dtype": "float32",
-  "transformers_version": "4.41.2",
   "type_vocab_size": 1,
   "use_cache": true,
   "vocab_size": 32000

 {
+  "_name_or_path": "kms7530/roberta-base-infringement-detect",
   "architectures": [
     "RobertaForSequenceClassification"
   ],
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "tokenizer_class": "BertTokenizer",
+  "transformers_version": "4.42.3",
   "type_vocab_size": 1,
   "use_cache": true,
   "vocab_size": 32000

onnx/model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c95f5f62d62c1eab2b90c2e37dd975466bce76d7da5f77bdc9b92931ab8584f
+size 442760521

onnx/model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85fe5d43fbc90db3a9c03e0308ef919749e36e2c87943d83886fec41d6c2f6e7
+size 111877129

quantize_config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+    "per_channel": true,
+    "reduce_range": true,
+    "per_model_config": {
+        "model": {
+            "op_types": [
+                "Gemm",
+                "Add",
+                "Transpose",
+                "ConstantOfShape",
+                "ReduceMean",
+                "Sqrt",
+                "Gather",
+                "CumSum",
+                "Slice",
+                "Equal",
+                "Shape",
+                "Mul",
+                "Cast",
+                "Concat",
+                "Not",
+                "Erf",
+                "Tanh",
+                "MatMul",
+                "Unsqueeze",
+                "Reshape",
+                "Softmax",
+                "Sub",
+                "Pow",
+                "Expand",
+                "Constant",
+                "Div",
+                "Where"
+            ],
+            "weight_type": "QInt8"
+        }
+    }
+}

special_tokens_map.json CHANGED Viewed

@@ -1,9 +1,51 @@
 {
-  "unk_token": "[UNK]",
-  "sep_token": "[SEP]",
-  "pad_token": "[PAD]",
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "bos_token": "[CLS]",
-  "eos_token": "[SEP]"
 }

 {
+  "bos_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer.json CHANGED Viewed

@@ -5,48 +5,48 @@
   "added_tokens": [
     {
       "id": 0,
-      "special": true,
       "content": "[CLS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 1,
-      "special": true,
       "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 2,
-      "special": true,
       "content": "[SEP]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 3,
-      "special": true,
       "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 4,
-      "special": true,
       "content": "[MASK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     }
   ],
   "normalizer": {
@@ -56,27 +56,89 @@
     "strip_accents": null,
     "lowercase": false
   },
-  "pre_tokenizer": { "type": "BertPreTokenizer" },
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [
-      { "SpecialToken": { "id": "[CLS]", "type_id": 0 } },
-      { "Sequence": { "id": "A", "type_id": 0 } },
-      { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }
     ],
     "pair": [
-      { "SpecialToken": { "id": "[CLS]", "type_id": 0 } },
-      { "Sequence": { "id": "A", "type_id": 0 } },
-      { "SpecialToken": { "id": "[SEP]", "type_id": 0 } },
-      { "Sequence": { "id": "B", "type_id": 0 } },
-      { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }
     ],
     "special_tokens": {
-      "[CLS]": { "id": "[CLS]", "ids": [0], "tokens": ["[CLS]"] },
-      "[SEP]": { "id": "[SEP]", "ids": [2], "tokens": ["[SEP]"] }
     }
   },
-  "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true },
   "model": {
     "type": "WordPiece",
     "unk_token": "[UNK]",
@@ -32085,4 +32147,4 @@
       "[unused499]": 31999
     }
   }
-}

   "added_tokens": [
     {
       "id": 0,
       "content": "[CLS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 1,
       "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 2,
       "content": "[SEP]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 3,
       "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 4,
       "content": "[MASK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": {
     "strip_accents": null,
     "lowercase": false
   },
+  "pre_tokenizer": {
+    "type": "BertPreTokenizer"
+  },
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      }
     ],
     "pair": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      }
     ],
     "special_tokens": {
+      "[CLS]": {
+        "id": "[CLS]",
+        "ids": [
+          0
+        ],
+        "tokens": [
+          "[CLS]"
+        ]
+      },
+      "[SEP]": {
+        "id": "[SEP]",
+        "ids": [
+          2
+        ],
+        "tokens": [
+          "[SEP]"
+        ]
+      }
     }
   },
+  "decoder": {
+    "type": "WordPiece",
+    "prefix": "##",
+    "cleanup": true
+  },
   "model": {
     "type": "WordPiece",
     "unk_token": "[UNK]",
       "[unused499]": 31999
     }
   }
+}

tokenizer_config.json CHANGED Viewed

@@ -1,16 +1,59 @@
 {
-  "do_lower_case": false,
   "do_basic_tokenize": true,
   "never_split": null,
-  "unk_token": "[UNK]",
-  "sep_token": "[SEP]",
   "pad_token": "[PAD]",
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "bos_token": "[CLS]",
-  "eos_token": "[SEP]",
-  "tokenize_chinese_chars": true,
   "strip_accents": null,
-  "model_max_length": 512,
-  "tokenizer_class": "BertTokenizer"
 }

 {
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
   "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
   "never_split": null,
   "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
   "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
 }