hieupth commited on
Commit
fcba339
·
unverified ·
1 Parent(s): 3552118

chore: add onnx model

Browse files
viencoder.model/1/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d970ed83066ae552f87d1efc6560c1cf0715b3978ab38b913f752b2cbe2378
3
+ size 537943958
viencoder.model/config.pbtxt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "viencoder.model"
2
+ platform: "onnxruntime_onnx"
3
+ backend: "onnxruntime"
4
+ default_model_filename: "model.onnx"
5
+ max_batch_size: 0
6
+ input [
7
+ {
8
+ name: "input_ids"
9
+ data_type: TYPE_INT64
10
+ dims: [ -1, -1 ]
11
+ },
12
+ {
13
+ name: "attention_mask"
14
+ data_type: TYPE_INT64
15
+ dims: [ -1, -1 ]
16
+ }
17
+ ]
18
+ output [
19
+ {
20
+ name: "sentence_embedding"
21
+ data_type: TYPE_FP32
22
+ dims: [ -1, 768 ]
23
+ }
24
+ ]
viencoder.tokenizer/1/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<mask>": 64000
3
+ }
viencoder.tokenizer/1/bpe.codes ADDED
The diff for this file is too large to render. See raw diff
 
viencoder.tokenizer/1/model.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from typing import Dict, List
4
+
5
+ import numpy as np
6
+ import triton_python_backend_utils as pb_utils
7
+ from transformers import AutoTokenizer, PreTrainedTokenizer, TensorType
8
+
9
+ class TritonPythonModel:
10
+ tokenizer: PreTrainedTokenizer
11
+
12
+ def initialize(self, args: Dict[str, str]) -> None:
13
+ """
14
+ Initialize the tokenization process
15
+ :param args: arguments from Triton config file
16
+ """
17
+ # more variables in https://github.com/triton-inference-server/python_backend/blob/main/src/python.cc
18
+ path: str = os.path.join(args["model_repository"], args["model_version"])
19
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
20
+
21
+ def execute(self, requests) -> "List[List[pb_utils.Tensor]]":
22
+ """
23
+ Parse and tokenize each request
24
+ :param requests: 1 or more requests received by Triton server.
25
+ :return: text as input tensors
26
+ """
27
+ responses = []
28
+ # for loop for batch requests (disabled in our case)
29
+ for request in requests:
30
+ # binary data typed back to string
31
+ query = [
32
+ t.decode("UTF-8")
33
+ for t in pb_utils.get_input_tensor_by_name(request, "TEXT")
34
+ .as_numpy()
35
+ .tolist()
36
+ ]
37
+ tokens: Dict[str, np.ndarray] = self.tokenizer(
38
+ query, padding=True, truncation=True,return_tensors=TensorType.NUMPY
39
+ )
40
+ # tensorrt uses int32 as input type, ort uses int64
41
+ tokens = {k: v.astype(np.int64) for k, v in tokens.items()}
42
+ # communicate the tokenization results to Triton server
43
+ outputs = list()
44
+ for input_name in self.tokenizer.model_input_names:
45
+ tensor_input = pb_utils.Tensor(input_name, tokens[input_name])
46
+ outputs.append(tensor_input)
47
+
48
+ inference_response = pb_utils.InferenceResponse(output_tensors=outputs)
49
+ responses.append(inference_response)
50
+
51
+ return responses
viencoder.tokenizer/1/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
viencoder.tokenizer/1/tokenizer_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "64000": {
36
+ "content": "<mask>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "pad_token": "<pad>",
51
+ "sep_token": "</s>",
52
+ "tokenizer_class": "PhobertTokenizer",
53
+ "unk_token": "<unk>"
54
+ }
viencoder.tokenizer/1/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
viencoder.tokenizer/config.pbtxt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "viencoder.tokenizer"
2
+ max_batch_size: 0
3
+ backend: "python"
4
+
5
+ input [
6
+ {
7
+ name: "TEXT"
8
+ data_type: TYPE_STRING
9
+ dims: [ -1 ]
10
+ }
11
+ ]
12
+
13
+ output [
14
+ {
15
+ name: "input_ids"
16
+ data_type: TYPE_INT64
17
+ dims: [-1, -1]
18
+ },
19
+ {
20
+ name: "attention_mask"
21
+ data_type: TYPE_INT64
22
+ dims: [-1, -1]
23
+ }
24
+ ]
viencoder/1/.gitignore ADDED
File without changes
viencoder/config.pbtxt ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "viencoder"
2
+ # maximum batch size
3
+ max_batch_size: 0
4
+ platform: "ensemble"
5
+
6
+ #input to the model
7
+ input [
8
+ {
9
+ name: "TEXT"
10
+ data_type: TYPE_STRING
11
+ dims: [ -1 ]
12
+ # -1 means dynamic axis, aka this dimension may change
13
+ }
14
+ ]
15
+
16
+ #output of the model
17
+ output {
18
+ name: "output_0"
19
+ data_type: TYPE_FP32
20
+ dims: [-1, -1]
21
+ # two dimensional tensor, where 1st dimension: batch-size, 2nd dimension: #classes
22
+ }
23
+
24
+ #Type of scheduler to be used
25
+ ensemble_scheduling {
26
+ step [
27
+ {
28
+ model_name: "viencoder.tokenizer"
29
+ model_version: -1
30
+ input_map {
31
+ key: "TEXT"
32
+ value: "TEXT"
33
+ }
34
+ output_map [
35
+ {
36
+ key: "input_ids"
37
+ value: "input_ids"
38
+ },
39
+ {
40
+ key: "attention_mask"
41
+ value: "attention_mask"
42
+ }
43
+ ]
44
+ },
45
+ {
46
+ model_name: "viencoder.model"
47
+ model_version: -1
48
+ input_map [
49
+ {
50
+ key: "input_ids"
51
+ value: "input_ids"
52
+ },
53
+ {
54
+ key: "attention_mask"
55
+ value: "attention_mask"
56
+ }
57
+ ]
58
+ output_map {
59
+ key: "sentence_embedding"
60
+ value: "output_0"
61
+ }
62
+ }
63
+ ]
64
+ }