Upload model_repo for zipformer offline
Browse files- model_repo_offline/decoder/1/.gitkeep +0 -0
- model_repo_offline/decoder/1/decoder.onnx +3 -0
- model_repo_offline/decoder/config.pbtxt +44 -0
- model_repo_offline/encoder/1/.gitkeep +0 -0
- model_repo_offline/encoder/1/encoder.onnx +3 -0
- model_repo_offline/encoder/config.pbtxt +56 -0
- model_repo_offline/feature_extractor/1/__pycache__/model.cpython-38.pyc +0 -0
- model_repo_offline/feature_extractor/1/model.py +155 -0
- model_repo_offline/feature_extractor/config.pbtxt +72 -0
- model_repo_offline/joiner/1/.gitkeep +0 -0
- model_repo_offline/joiner/1/joiner.onnx +3 -0
- model_repo_offline/joiner/config.pbtxt +49 -0
- model_repo_offline/joiner_decoder_proj/1/joiner_decoder_proj.onnx +3 -0
- model_repo_offline/joiner_decoder_proj/config.pbtxt +43 -0
- model_repo_offline/joiner_encoder_proj/1/joiner_encoder_proj.onnx +3 -0
- model_repo_offline/joiner_encoder_proj/config.pbtxt +43 -0
- model_repo_offline/scorer/1/__pycache__/model.cpython-38.pyc +0 -0
- model_repo_offline/scorer/1/__pycache__/search.cpython-38.pyc +0 -0
- model_repo_offline/scorer/1/model.py +181 -0
- model_repo_offline/scorer/1/search.py +133 -0
- model_repo_offline/scorer/config.pbtxt +68 -0
- model_repo_offline/transducer/1/.gitkeep +0 -0
- model_repo_offline/transducer/config.pbtxt +99 -0
model_repo_offline/decoder/1/.gitkeep
ADDED
|
File without changes
|
model_repo_offline/decoder/1/decoder.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d66e66ba71bc62f558aa2b246fb1c7d5f2e7b6b423e2c5ada327c3a2d40fd7ee
|
| 3 |
+
size 1041576
|
model_repo_offline/decoder/config.pbtxt
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: "decoder"
|
| 16 |
+
backend: "onnxruntime"
|
| 17 |
+
default_model_filename: "decoder.onnx"
|
| 18 |
+
|
| 19 |
+
max_batch_size: 512
|
| 20 |
+
input [
|
| 21 |
+
{
|
| 22 |
+
name: "y"
|
| 23 |
+
data_type: TYPE_INT64
|
| 24 |
+
dims: [2]
|
| 25 |
+
}
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
output [
|
| 29 |
+
{
|
| 30 |
+
name: "decoder_out"
|
| 31 |
+
data_type: TYPE_FP32
|
| 32 |
+
dims: [ -1, -1 ]
|
| 33 |
+
}
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
dynamic_batching {
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
instance_group [
|
| 40 |
+
{
|
| 41 |
+
count: 1
|
| 42 |
+
kind: KIND_GPU
|
| 43 |
+
}
|
| 44 |
+
]
|
model_repo_offline/encoder/1/.gitkeep
ADDED
|
File without changes
|
model_repo_offline/encoder/1/encoder.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f586e039864febaf6a65a5bb1fbf9e1ade230fafa3b82fc9c53e03d7dc06c40
|
| 3 |
+
size 353036839
|
model_repo_offline/encoder/config.pbtxt
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: "encoder"
|
| 16 |
+
backend: "onnxruntime"
|
| 17 |
+
default_model_filename: "encoder.onnx"
|
| 18 |
+
|
| 19 |
+
max_batch_size: 512
|
| 20 |
+
input [
|
| 21 |
+
{
|
| 22 |
+
name: "x"
|
| 23 |
+
data_type: TYPE_FP32
|
| 24 |
+
dims: [-1, 80]
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
name: "x_lens"
|
| 28 |
+
data_type: TYPE_INT64
|
| 29 |
+
dims: [1]
|
| 30 |
+
reshape: { shape: [ ] }
|
| 31 |
+
}
|
| 32 |
+
]
|
| 33 |
+
output [
|
| 34 |
+
{
|
| 35 |
+
name: "encoder_out"
|
| 36 |
+
data_type: TYPE_FP32
|
| 37 |
+
dims: [-1, -1 ]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
name: "encoder_out_lens"
|
| 41 |
+
data_type: TYPE_INT64
|
| 42 |
+
dims: [1]
|
| 43 |
+
reshape: { shape: [ ] }
|
| 44 |
+
}
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
dynamic_batching {
|
| 48 |
+
preferred_batch_size: [ 16, 32 ]
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
instance_group [
|
| 52 |
+
{
|
| 53 |
+
count: 1
|
| 54 |
+
kind: KIND_GPU
|
| 55 |
+
}
|
| 56 |
+
]
|
model_repo_offline/feature_extractor/1/__pycache__/model.cpython-38.pyc
ADDED
|
Binary file (4.77 kB). View file
|
|
|
model_repo_offline/feature_extractor/1/model.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import triton_python_backend_utils as pb_utils
|
| 16 |
+
from torch.utils.dlpack import to_dlpack
|
| 17 |
+
import torch
|
| 18 |
+
import numpy as np
|
| 19 |
+
import kaldifeat
|
| 20 |
+
import _kaldifeat
|
| 21 |
+
from typing import List
|
| 22 |
+
import json
|
| 23 |
+
|
| 24 |
+
class Fbank(torch.nn.Module):
|
| 25 |
+
def __init__(self, opts):
|
| 26 |
+
super(Fbank, self).__init__()
|
| 27 |
+
self.fbank = kaldifeat.Fbank(opts)
|
| 28 |
+
|
| 29 |
+
def forward(self, waves: List[torch.Tensor]):
|
| 30 |
+
return self.fbank(waves)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class TritonPythonModel:
|
| 34 |
+
"""Your Python model must use the same class name. Every Python model
|
| 35 |
+
that is created must have "TritonPythonModel" as the class name.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
def initialize(self, args):
|
| 39 |
+
"""`initialize` is called only once when the model is being loaded.
|
| 40 |
+
Implementing `initialize` function is optional. This function allows
|
| 41 |
+
the model to initialize any state associated with this model.
|
| 42 |
+
|
| 43 |
+
Parameters
|
| 44 |
+
----------
|
| 45 |
+
args : dict
|
| 46 |
+
Both keys and values are strings. The dictionary keys and values are:
|
| 47 |
+
* model_config: A JSON string containing the model configuration
|
| 48 |
+
* model_instance_kind: A string containing model instance kind
|
| 49 |
+
* model_instance_device_id: A string containing model instance device ID
|
| 50 |
+
* model_repository: Model repository path
|
| 51 |
+
* model_version: Model version
|
| 52 |
+
* model_name: Model name
|
| 53 |
+
"""
|
| 54 |
+
self.model_config = model_config = json.loads(args['model_config'])
|
| 55 |
+
self.max_batch_size = max(model_config["max_batch_size"], 1)
|
| 56 |
+
|
| 57 |
+
if "GPU" in model_config["instance_group"][0]["kind"]:
|
| 58 |
+
self.device = "cuda"
|
| 59 |
+
else:
|
| 60 |
+
self.device = "cpu"
|
| 61 |
+
|
| 62 |
+
# Get OUTPUT0 configuration
|
| 63 |
+
output0_config = pb_utils.get_output_config_by_name(
|
| 64 |
+
model_config, "speech")
|
| 65 |
+
# Convert Triton types to numpy types
|
| 66 |
+
output0_dtype = pb_utils.triton_string_to_numpy(
|
| 67 |
+
output0_config['data_type'])
|
| 68 |
+
if output0_dtype == np.float32:
|
| 69 |
+
self.output0_dtype = torch.float32
|
| 70 |
+
else:
|
| 71 |
+
self.output0_dtype = torch.float16
|
| 72 |
+
|
| 73 |
+
# Get OUTPUT1 configuration
|
| 74 |
+
output1_config = pb_utils.get_output_config_by_name(
|
| 75 |
+
model_config, "speech_lengths")
|
| 76 |
+
# Convert Triton types to numpy types
|
| 77 |
+
self.output1_dtype = pb_utils.triton_string_to_numpy(
|
| 78 |
+
output1_config['data_type'])
|
| 79 |
+
|
| 80 |
+
params = self.model_config['parameters']
|
| 81 |
+
opts = kaldifeat.FbankOptions()
|
| 82 |
+
opts.frame_opts.dither = 0
|
| 83 |
+
opts.frame_opts.snip_edges = False
|
| 84 |
+
for li in params.items():
|
| 85 |
+
key, value = li
|
| 86 |
+
value = value["string_value"]
|
| 87 |
+
if key == "num_mel_bins":
|
| 88 |
+
opts.mel_opts.num_bins = int(value)
|
| 89 |
+
elif key == "frame_shift_in_ms":
|
| 90 |
+
opts.frame_opts.frame_shift_ms = float(value)
|
| 91 |
+
elif key == "frame_length_in_ms":
|
| 92 |
+
opts.frame_opts.frame_length_ms = float(value)
|
| 93 |
+
elif key == "sample_rate":
|
| 94 |
+
opts.frame_opts.samp_freq = int(value)
|
| 95 |
+
opts.device = torch.device(self.device)
|
| 96 |
+
self.opts = opts
|
| 97 |
+
self.feature_extractor = Fbank(self.opts)
|
| 98 |
+
self.feature_size = opts.mel_opts.num_bins
|
| 99 |
+
|
| 100 |
+
def execute(self, requests):
|
| 101 |
+
"""`execute` must be implemented in every Python model. `execute`
|
| 102 |
+
function receives a list of pb_utils.InferenceRequest as the only
|
| 103 |
+
argument. This function is called when an inference is requested
|
| 104 |
+
for this model.
|
| 105 |
+
|
| 106 |
+
Parameters
|
| 107 |
+
----------
|
| 108 |
+
requests : list
|
| 109 |
+
A list of pb_utils.InferenceRequest
|
| 110 |
+
|
| 111 |
+
Returns
|
| 112 |
+
-------
|
| 113 |
+
list
|
| 114 |
+
A list of pb_utils.InferenceResponse. The length of this list must
|
| 115 |
+
be the same as `requests`
|
| 116 |
+
"""
|
| 117 |
+
batch_count = []
|
| 118 |
+
total_waves = []
|
| 119 |
+
batch_len = []
|
| 120 |
+
responses = []
|
| 121 |
+
for request in requests:
|
| 122 |
+
input0 = pb_utils.get_input_tensor_by_name(request, "wav")
|
| 123 |
+
input1 = pb_utils.get_input_tensor_by_name(request, "wav_lens")
|
| 124 |
+
|
| 125 |
+
cur_b_wav = input0.as_numpy()
|
| 126 |
+
cur_b_wav_lens = input1.as_numpy() # b x 1
|
| 127 |
+
cur_batch = cur_b_wav.shape[0]
|
| 128 |
+
cur_len = cur_b_wav.shape[1]
|
| 129 |
+
batch_count.append(cur_batch)
|
| 130 |
+
batch_len.append(cur_len)
|
| 131 |
+
for wav, wav_len in zip(cur_b_wav, cur_b_wav_lens):
|
| 132 |
+
wav_len = wav_len[0]
|
| 133 |
+
wav = torch.tensor(wav[0:wav_len], dtype=torch.float32,
|
| 134 |
+
device=self.device)
|
| 135 |
+
total_waves.append(wav)
|
| 136 |
+
|
| 137 |
+
features = self.feature_extractor(total_waves)
|
| 138 |
+
for b, l in zip(batch_count, batch_len):
|
| 139 |
+
expect_feat_len = _kaldifeat.num_frames(l, self.opts.frame_opts)
|
| 140 |
+
speech = torch.zeros((b, expect_feat_len, self.feature_size),
|
| 141 |
+
dtype=self.output0_dtype, device=self.device)
|
| 142 |
+
speech_lengths = torch.zeros((b, 1), dtype=torch.int64, device=self.device)
|
| 143 |
+
for i in range(b):
|
| 144 |
+
f = features.pop(0)
|
| 145 |
+
f_l = f.shape[0]
|
| 146 |
+
speech[i, 0: f_l, :] = f.to(self.output0_dtype)
|
| 147 |
+
speech_lengths[i][0] = f_l
|
| 148 |
+
speech = speech.cpu()
|
| 149 |
+
speech_lengths = speech_lengths.cpu()
|
| 150 |
+
out0 = pb_utils.Tensor.from_dlpack("speech", to_dlpack(speech))
|
| 151 |
+
out1 = pb_utils.Tensor.from_dlpack("speech_lengths",
|
| 152 |
+
to_dlpack(speech_lengths))
|
| 153 |
+
inference_response = pb_utils.InferenceResponse(output_tensors=[out0, out1])
|
| 154 |
+
responses.append(inference_response)
|
| 155 |
+
return responses
|
model_repo_offline/feature_extractor/config.pbtxt
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: "feature_extractor"
|
| 16 |
+
backend: "python"
|
| 17 |
+
max_batch_size: 512
|
| 18 |
+
|
| 19 |
+
parameters [
|
| 20 |
+
{
|
| 21 |
+
key: "num_mel_bins",
|
| 22 |
+
value: { string_value: "80"}
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
key: "frame_shift_in_ms"
|
| 26 |
+
value: { string_value: "10"}
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
key: "frame_length_in_ms"
|
| 30 |
+
value: { string_value: "25"}
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
key: "sample_rate"
|
| 34 |
+
value: { string_value: "16000"}
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
input [
|
| 40 |
+
{
|
| 41 |
+
name: "wav"
|
| 42 |
+
data_type: TYPE_FP32
|
| 43 |
+
dims: [-1]
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
name: "wav_lens"
|
| 47 |
+
data_type: TYPE_INT32
|
| 48 |
+
dims: [1]
|
| 49 |
+
}
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
output [
|
| 53 |
+
{
|
| 54 |
+
name: "speech"
|
| 55 |
+
data_type: TYPE_FP32
|
| 56 |
+
dims: [-1, 80]
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
name: "speech_lengths"
|
| 60 |
+
data_type: TYPE_INT64
|
| 61 |
+
dims: [1]
|
| 62 |
+
}
|
| 63 |
+
]
|
| 64 |
+
|
| 65 |
+
dynamic_batching {
|
| 66 |
+
}
|
| 67 |
+
instance_group [
|
| 68 |
+
{
|
| 69 |
+
count: 1
|
| 70 |
+
kind: KIND_GPU
|
| 71 |
+
}
|
| 72 |
+
]
|
model_repo_offline/joiner/1/.gitkeep
ADDED
|
File without changes
|
model_repo_offline/joiner/1/joiner.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:761d2fd80adf49d75cf13e408468e8c735126d8baec527c56c797afffb71250a
|
| 3 |
+
size 1026490
|
model_repo_offline/joiner/config.pbtxt
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: "joiner"
|
| 16 |
+
backend: "onnxruntime"
|
| 17 |
+
default_model_filename: "joiner.onnx"
|
| 18 |
+
|
| 19 |
+
max_batch_size: 512
|
| 20 |
+
input [
|
| 21 |
+
{
|
| 22 |
+
name: "projected_encoder_out"
|
| 23 |
+
data_type: TYPE_FP32
|
| 24 |
+
dims: [ 512 ]
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
name: "projected_decoder_out"
|
| 28 |
+
data_type: TYPE_FP32
|
| 29 |
+
dims: [ 512 ]
|
| 30 |
+
}
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
output [
|
| 34 |
+
{
|
| 35 |
+
name: "logit"
|
| 36 |
+
data_type: TYPE_FP32
|
| 37 |
+
dims: [ 500 ]
|
| 38 |
+
}
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
dynamic_batching {
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
instance_group [
|
| 45 |
+
{
|
| 46 |
+
count: 1
|
| 47 |
+
kind: KIND_GPU
|
| 48 |
+
}
|
| 49 |
+
]
|
model_repo_offline/joiner_decoder_proj/1/joiner_decoder_proj.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:230fa585dd50f78e28ecf80285c46dabc974f929e3872eb4a4bc73e8ab8e5832
|
| 3 |
+
size 1050893
|
model_repo_offline/joiner_decoder_proj/config.pbtxt
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: "joiner_decoder_proj"
|
| 16 |
+
backend: "onnxruntime"
|
| 17 |
+
default_model_filename: "joiner_decoder_proj.onnx"
|
| 18 |
+
|
| 19 |
+
max_batch_size: 512
|
| 20 |
+
input [
|
| 21 |
+
{
|
| 22 |
+
name: "decoder_out"
|
| 23 |
+
data_type: TYPE_FP32
|
| 24 |
+
dims: [ 512 ]
|
| 25 |
+
}
|
| 26 |
+
]
|
| 27 |
+
output [
|
| 28 |
+
{
|
| 29 |
+
name: "projected_decoder_out"
|
| 30 |
+
data_type: TYPE_FP32
|
| 31 |
+
dims: [ 512 ]
|
| 32 |
+
}
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
dynamic_batching {
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
instance_group [
|
| 39 |
+
{
|
| 40 |
+
count: 1
|
| 41 |
+
kind: KIND_GPU
|
| 42 |
+
}
|
| 43 |
+
]
|
model_repo_offline/joiner_encoder_proj/1/joiner_encoder_proj.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:754cab31c25f9f2d48ce34291c8564a593cea5cb6114be82b28cbecaa3b3e9e5
|
| 3 |
+
size 788749
|
model_repo_offline/joiner_encoder_proj/config.pbtxt
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: "joiner_encoder_proj"
|
| 16 |
+
backend: "onnxruntime"
|
| 17 |
+
default_model_filename: "joiner_encoder_proj.onnx"
|
| 18 |
+
|
| 19 |
+
max_batch_size: 512
|
| 20 |
+
input [
|
| 21 |
+
{
|
| 22 |
+
name: "encoder_out"
|
| 23 |
+
data_type: TYPE_FP32
|
| 24 |
+
dims: [ 384 ]
|
| 25 |
+
}
|
| 26 |
+
]
|
| 27 |
+
output [
|
| 28 |
+
{
|
| 29 |
+
name: "projected_encoder_out"
|
| 30 |
+
data_type: TYPE_FP32
|
| 31 |
+
dims: [ 512 ]
|
| 32 |
+
}
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
dynamic_batching {
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
instance_group [
|
| 39 |
+
{
|
| 40 |
+
count: 2
|
| 41 |
+
kind: KIND_GPU
|
| 42 |
+
}
|
| 43 |
+
]
|
model_repo_offline/scorer/1/__pycache__/model.cpython-38.pyc
ADDED
|
Binary file (5.33 kB). View file
|
|
|
model_repo_offline/scorer/1/__pycache__/search.cpython-38.pyc
ADDED
|
Binary file (3.06 kB). View file
|
|
|
model_repo_offline/scorer/1/model.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
+
# you may not use this file except in compliance with the License.
|
| 6 |
+
# You may obtain a copy of the License at
|
| 7 |
+
#
|
| 8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
+
#
|
| 10 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
+
# See the License for the specific language governing permissions and
|
| 14 |
+
# limitations under the License.
|
| 15 |
+
|
| 16 |
+
import triton_python_backend_utils as pb_utils
|
| 17 |
+
import numpy as np
|
| 18 |
+
|
| 19 |
+
import json
|
| 20 |
+
|
| 21 |
+
import torch
|
| 22 |
+
from torch.utils.dlpack import from_dlpack, to_dlpack
|
| 23 |
+
import sentencepiece as spm
|
| 24 |
+
from icefall.lexicon import Lexicon
|
| 25 |
+
|
| 26 |
+
from search import greedy_search
|
| 27 |
+
|
| 28 |
+
class TritonPythonModel:
|
| 29 |
+
"""Your Python model must use the same class name. Every Python model
|
| 30 |
+
that is created must have "TritonPythonModel" as the class name.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def initialize(self, args):
|
| 34 |
+
"""`initialize` is called only once when the model is being loaded.
|
| 35 |
+
Implementing `initialize` function is optional. This function allows
|
| 36 |
+
the model to initialize any state associated with this model.
|
| 37 |
+
|
| 38 |
+
Parameters
|
| 39 |
+
----------
|
| 40 |
+
args : dict
|
| 41 |
+
Both keys and values are strings. The dictionary keys and values are:
|
| 42 |
+
* model_config: A JSON string containing the model configuration
|
| 43 |
+
* model_instance_kind: A string containing model instance kind
|
| 44 |
+
* model_instance_device_id: A string containing model instance device ID
|
| 45 |
+
* model_repository: Model repository path
|
| 46 |
+
* model_version: Model version
|
| 47 |
+
* model_name: Model name
|
| 48 |
+
"""
|
| 49 |
+
self.model_config = model_config = json.loads(args['model_config'])
|
| 50 |
+
self.max_batch_size = max(model_config["max_batch_size"], 1)
|
| 51 |
+
|
| 52 |
+
# Get OUTPUT0 configuration
|
| 53 |
+
output0_config = pb_utils.get_output_config_by_name(
|
| 54 |
+
model_config, "OUTPUT0")
|
| 55 |
+
# Convert Triton types to numpy types
|
| 56 |
+
self.out0_dtype = pb_utils.triton_string_to_numpy(
|
| 57 |
+
output0_config['data_type'])
|
| 58 |
+
|
| 59 |
+
model_instance_kind = args['model_instance_kind']
|
| 60 |
+
model_instance_device_id = args['model_instance_device_id']
|
| 61 |
+
if model_instance_kind == 'GPU':
|
| 62 |
+
self.device = f'cuda:{model_instance_device_id}'
|
| 63 |
+
else:
|
| 64 |
+
self.device= 'cpu'
|
| 65 |
+
|
| 66 |
+
# Get INPUT configuration
|
| 67 |
+
encoder_config = pb_utils.get_input_config_by_name(
|
| 68 |
+
model_config, "encoder_out")
|
| 69 |
+
self.data_type = pb_utils.triton_string_to_numpy(
|
| 70 |
+
encoder_config['data_type'])
|
| 71 |
+
if self.data_type == np.float32:
|
| 72 |
+
self.torch_dtype = torch.float32
|
| 73 |
+
else:
|
| 74 |
+
assert self.data_type == np.float16
|
| 75 |
+
self.torch_dtype = torch.float16
|
| 76 |
+
|
| 77 |
+
self.encoder_dim = encoder_config['dims'][-1]
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
self.init_parameters(self.model_config['parameters'])
|
| 81 |
+
|
| 82 |
+
def init_parameters(self, parameters):
|
| 83 |
+
for key,value in parameters.items():
|
| 84 |
+
parameters[key] = value["string_value"]
|
| 85 |
+
self.context_size = int(parameters['context_size'])
|
| 86 |
+
self.decoding_method = parameters['decoding_method']
|
| 87 |
+
if 'bpe' in parameters['tokenizer_file']:
|
| 88 |
+
sp = spm.SentencePieceProcessor()
|
| 89 |
+
sp.load(parameters['tokenizer_file'])
|
| 90 |
+
self.blank_id = sp.piece_to_id("<blk>")
|
| 91 |
+
self.unk_id = sp.piece_to_id("<unk>")
|
| 92 |
+
self.vocab_size = sp.get_piece_size()
|
| 93 |
+
self.tokenizer = sp
|
| 94 |
+
else:
|
| 95 |
+
assert 'char' in parameters['tokenizer_file']
|
| 96 |
+
lexicon = Lexicon(parameters['tokenizer_file'])
|
| 97 |
+
self.unk_id = lexicon.token_table["<unk>"]
|
| 98 |
+
self.blank_id = lexicon.token_table["<blk>"]
|
| 99 |
+
self.vocab_size = max(lexicon.tokens) + 1
|
| 100 |
+
self.tokenizer = lexicon
|
| 101 |
+
|
| 102 |
+
def execute(self, requests):
|
| 103 |
+
"""`execute` must be implemented in every Python model. `execute`
|
| 104 |
+
function receives a list of pb_utils.InferenceRequest as the only
|
| 105 |
+
argument. This function is called when an inference is requested
|
| 106 |
+
for this model.
|
| 107 |
+
|
| 108 |
+
Parameters
|
| 109 |
+
----------
|
| 110 |
+
requests : list
|
| 111 |
+
A list of pb_utils.InferenceRequest
|
| 112 |
+
|
| 113 |
+
Returns
|
| 114 |
+
-------
|
| 115 |
+
list
|
| 116 |
+
A list of pb_utils.InferenceResponse. The length of this list must
|
| 117 |
+
be the same as `requests`
|
| 118 |
+
"""
|
| 119 |
+
# Every Python backend must iterate through list of requests and create
|
| 120 |
+
# an instance of pb_utils.InferenceResponse class for each of them. You
|
| 121 |
+
# should avoid storing any of the input Tensors in the class attributes
|
| 122 |
+
# as they will be overridden in subsequent inference requests. You can
|
| 123 |
+
# make a copy of the underlying NumPy array and store it if it is
|
| 124 |
+
# required.
|
| 125 |
+
|
| 126 |
+
batch_encoder_out_list, batch_encoder_lens_list = [], []
|
| 127 |
+
batchsize_lists = []
|
| 128 |
+
total_seqs = 0
|
| 129 |
+
encoder_max_len = 0
|
| 130 |
+
|
| 131 |
+
for request in requests:
|
| 132 |
+
# Perform inference on the request and append it to responses list...
|
| 133 |
+
in_0 = pb_utils.get_input_tensor_by_name(request, "encoder_out")
|
| 134 |
+
in_1 = pb_utils.get_input_tensor_by_name(request, "encoder_out_lens")
|
| 135 |
+
assert not in_0.is_cpu()
|
| 136 |
+
batch_encoder_out_list.append(from_dlpack(in_0.to_dlpack()))
|
| 137 |
+
encoder_max_len = max(encoder_max_len, batch_encoder_out_list[-1].shape[1])
|
| 138 |
+
cur_b_lens = from_dlpack(in_1.to_dlpack())
|
| 139 |
+
batch_encoder_lens_list.append(cur_b_lens)
|
| 140 |
+
cur_batchsize = cur_b_lens.shape[0]
|
| 141 |
+
batchsize_lists.append(cur_batchsize)
|
| 142 |
+
total_seqs += cur_batchsize
|
| 143 |
+
|
| 144 |
+
encoder_out = torch.zeros((total_seqs, encoder_max_len, self.encoder_dim),
|
| 145 |
+
dtype=self.torch_dtype, device=self.device)
|
| 146 |
+
encoder_out_lens = torch.zeros(total_seqs, dtype=torch.int64)
|
| 147 |
+
st = 0
|
| 148 |
+
|
| 149 |
+
for b in batchsize_lists:
|
| 150 |
+
t = batch_encoder_out_list.pop(0)
|
| 151 |
+
encoder_out[st:st + b, 0:t.shape[1]] = t
|
| 152 |
+
encoder_out_lens[st:st + b] = batch_encoder_lens_list.pop(0)
|
| 153 |
+
st += b
|
| 154 |
+
|
| 155 |
+
if self.decoding_method == 'greedy_search':
|
| 156 |
+
ans = greedy_search(encoder_out, encoder_out_lens, self.context_size, self.unk_id, self.blank_id)
|
| 157 |
+
else:
|
| 158 |
+
raise NotImplementedError
|
| 159 |
+
results = []
|
| 160 |
+
if hasattr(self.tokenizer, 'token_table'):
|
| 161 |
+
for i in range(len(ans)):
|
| 162 |
+
results.append([self.tokenizer.token_table[idx] for idx in ans[i]])
|
| 163 |
+
else:
|
| 164 |
+
for hyp in self.tokenizer.decode(ans):
|
| 165 |
+
results.append(hyp.split())
|
| 166 |
+
st = 0
|
| 167 |
+
responses = []
|
| 168 |
+
for b in batchsize_lists:
|
| 169 |
+
sents = np.array(results[st:st + b])
|
| 170 |
+
out0 = pb_utils.Tensor("OUTPUT0", sents.astype(self.out0_dtype))
|
| 171 |
+
inference_response = pb_utils.InferenceResponse(output_tensors=[out0])
|
| 172 |
+
responses.append(inference_response)
|
| 173 |
+
st += b
|
| 174 |
+
return responses
|
| 175 |
+
|
| 176 |
+
def finalize(self):
|
| 177 |
+
"""`finalize` is called only once when the model is being unloaded.
|
| 178 |
+
Implementing `finalize` function is optional. This function allows
|
| 179 |
+
the model to perform any necessary clean ups before exit.
|
| 180 |
+
"""
|
| 181 |
+
print('Cleaning up...')
|
model_repo_offline/scorer/1/search.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import triton_python_backend_utils as pb_utils
|
| 16 |
+
import numpy as np
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
from torch.utils.dlpack import from_dlpack, to_dlpack
|
| 20 |
+
|
| 21 |
+
def forward_joiner(cur_encoder_out, decoder_out):
|
| 22 |
+
in_joiner_tensor_0 = pb_utils.Tensor.from_dlpack("encoder_out", to_dlpack(cur_encoder_out))
|
| 23 |
+
in_joiner_tensor_1 = pb_utils.Tensor.from_dlpack("decoder_out", to_dlpack(decoder_out.squeeze(1)))
|
| 24 |
+
|
| 25 |
+
inference_request = pb_utils.InferenceRequest(
|
| 26 |
+
model_name='joiner_encoder_proj',
|
| 27 |
+
requested_output_names=['projected_encoder_out'],
|
| 28 |
+
inputs=[in_joiner_tensor_0])
|
| 29 |
+
inference_response = inference_request.exec()
|
| 30 |
+
if inference_response.has_error():
|
| 31 |
+
raise pb_utils.TritonModelException(inference_response.error().message())
|
| 32 |
+
else:
|
| 33 |
+
# Extract the output tensors from the inference response.
|
| 34 |
+
proj_encoder_out = pb_utils.get_output_tensor_by_name(inference_response,
|
| 35 |
+
'projected_encoder_out')
|
| 36 |
+
|
| 37 |
+
inference_request = pb_utils.InferenceRequest(
|
| 38 |
+
model_name='joiner_decoder_proj',
|
| 39 |
+
requested_output_names=['projected_decoder_out'],
|
| 40 |
+
inputs=[in_joiner_tensor_1])
|
| 41 |
+
inference_response = inference_request.exec()
|
| 42 |
+
if inference_response.has_error():
|
| 43 |
+
raise pb_utils.TritonModelException(inference_response.error().message())
|
| 44 |
+
else:
|
| 45 |
+
# Extract the output tensors from the inference response.
|
| 46 |
+
proj_decoder_out = pb_utils.get_output_tensor_by_name(inference_response,
|
| 47 |
+
'projected_decoder_out')
|
| 48 |
+
|
| 49 |
+
inference_request = pb_utils.InferenceRequest(
|
| 50 |
+
model_name='joiner',
|
| 51 |
+
requested_output_names=['logit'],
|
| 52 |
+
inputs=[proj_encoder_out, proj_decoder_out])
|
| 53 |
+
inference_response = inference_request.exec()
|
| 54 |
+
|
| 55 |
+
if inference_response.has_error():
|
| 56 |
+
raise pb_utils.TritonModelException(inference_response.error().message())
|
| 57 |
+
else:
|
| 58 |
+
# Extract the output tensors from the inference response.
|
| 59 |
+
logits = pb_utils.get_output_tensor_by_name(inference_response,
|
| 60 |
+
'logit')
|
| 61 |
+
logits = torch.utils.dlpack.from_dlpack(logits.to_dlpack()).cpu()
|
| 62 |
+
assert len(logits.shape) == 2, logits.shape
|
| 63 |
+
return logits
|
| 64 |
+
|
| 65 |
+
def forward_decoder(hyps, context_size):
|
| 66 |
+
decoder_input = [h[-context_size:] for h in hyps]
|
| 67 |
+
|
| 68 |
+
decoder_input = np.asarray(decoder_input,dtype=np.int64)
|
| 69 |
+
|
| 70 |
+
in_decoder_input_tensor = pb_utils.Tensor("y", decoder_input)
|
| 71 |
+
|
| 72 |
+
inference_request = pb_utils.InferenceRequest(
|
| 73 |
+
model_name='decoder',
|
| 74 |
+
requested_output_names=['decoder_out'],
|
| 75 |
+
inputs=[in_decoder_input_tensor])
|
| 76 |
+
|
| 77 |
+
inference_response = inference_request.exec()
|
| 78 |
+
if inference_response.has_error():
|
| 79 |
+
raise pb_utils.TritonModelException(inference_response.error().message())
|
| 80 |
+
else:
|
| 81 |
+
# Extract the output tensors from the inference response.
|
| 82 |
+
decoder_out = pb_utils.get_output_tensor_by_name(inference_response,
|
| 83 |
+
'decoder_out')
|
| 84 |
+
decoder_out = from_dlpack(decoder_out.to_dlpack())
|
| 85 |
+
return decoder_out
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def greedy_search(encoder_out, encoder_out_lens, context_size, unk_id, blank_id):
|
| 89 |
+
|
| 90 |
+
packed_encoder_out = torch.nn.utils.rnn.pack_padded_sequence(
|
| 91 |
+
input=encoder_out,
|
| 92 |
+
lengths=encoder_out_lens.cpu(),
|
| 93 |
+
batch_first=True,
|
| 94 |
+
enforce_sorted=False
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
pack_batch_size_list = packed_encoder_out.batch_sizes.tolist()
|
| 98 |
+
|
| 99 |
+
hyps = [[blank_id] * context_size for _ in range(encoder_out.shape[0])]
|
| 100 |
+
decoder_out = forward_decoder(hyps, context_size)
|
| 101 |
+
|
| 102 |
+
offset = 0
|
| 103 |
+
for batch_size in pack_batch_size_list:
|
| 104 |
+
start = offset
|
| 105 |
+
end = offset + batch_size
|
| 106 |
+
current_encoder_out = packed_encoder_out.data[start:end]
|
| 107 |
+
|
| 108 |
+
offset = end
|
| 109 |
+
|
| 110 |
+
decoder_out = decoder_out[:batch_size]
|
| 111 |
+
|
| 112 |
+
logits = forward_joiner(current_encoder_out, decoder_out)
|
| 113 |
+
|
| 114 |
+
assert logits.ndim == 2, logits.shape
|
| 115 |
+
y = logits.argmax(dim=1).tolist()
|
| 116 |
+
|
| 117 |
+
emitted = False
|
| 118 |
+
for i, v in enumerate(y):
|
| 119 |
+
if v not in (blank_id, unk_id):
|
| 120 |
+
hyps[i].append(v)
|
| 121 |
+
emitted = True
|
| 122 |
+
if emitted:
|
| 123 |
+
decoder_out = forward_decoder(hyps[:batch_size], context_size)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
sorted_ans = [h[context_size:] for h in hyps]
|
| 127 |
+
|
| 128 |
+
ans = []
|
| 129 |
+
unsorted_indices = packed_encoder_out.unsorted_indices.tolist()
|
| 130 |
+
for i in range(encoder_out.shape[0]):
|
| 131 |
+
ans.append(sorted_ans[unsorted_indices[i]])
|
| 132 |
+
|
| 133 |
+
return ans
|
model_repo_offline/scorer/config.pbtxt
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: "scorer"
|
| 16 |
+
backend: "python"
|
| 17 |
+
max_batch_size: 512
|
| 18 |
+
|
| 19 |
+
parameters [
|
| 20 |
+
{
|
| 21 |
+
key: "context_size",
|
| 22 |
+
value: { string_value: "2"}
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
key: "tokenizer_file",
|
| 26 |
+
value: { string_value: "/workspace/bpe.model"}
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
key: "FORCE_CPU_ONLY_INPUT_TENSORS",
|
| 30 |
+
value: {string_value:"no"}
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
key: "decoding_method",
|
| 34 |
+
value: { string_value: "greedy_search"}
|
| 35 |
+
}
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
input [
|
| 40 |
+
{
|
| 41 |
+
name: "encoder_out"
|
| 42 |
+
data_type: TYPE_FP32
|
| 43 |
+
dims: [-1, 384]
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
name: "encoder_out_lens"
|
| 47 |
+
data_type: TYPE_INT64
|
| 48 |
+
dims: [1]
|
| 49 |
+
reshape: { shape: [ ] }
|
| 50 |
+
}
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
output [
|
| 54 |
+
{
|
| 55 |
+
name: "OUTPUT0"
|
| 56 |
+
data_type: TYPE_STRING
|
| 57 |
+
dims: [1]
|
| 58 |
+
}
|
| 59 |
+
]
|
| 60 |
+
|
| 61 |
+
dynamic_batching {
|
| 62 |
+
}
|
| 63 |
+
instance_group [
|
| 64 |
+
{
|
| 65 |
+
count: 1
|
| 66 |
+
kind: KIND_CPU
|
| 67 |
+
}
|
| 68 |
+
]
|
model_repo_offline/transducer/1/.gitkeep
ADDED
|
File without changes
|
model_repo_offline/transducer/config.pbtxt
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: "transducer"
|
| 16 |
+
platform: "ensemble"
|
| 17 |
+
max_batch_size: 512
|
| 18 |
+
|
| 19 |
+
input [
|
| 20 |
+
{
|
| 21 |
+
name: "WAV"
|
| 22 |
+
data_type: TYPE_FP32
|
| 23 |
+
dims: [-1]
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
name: "WAV_LENS"
|
| 27 |
+
data_type: TYPE_INT32
|
| 28 |
+
dims: [1]
|
| 29 |
+
}
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
output [
|
| 33 |
+
{
|
| 34 |
+
name: "TRANSCRIPTS"
|
| 35 |
+
data_type: TYPE_STRING
|
| 36 |
+
dims: [1]
|
| 37 |
+
}
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
ensemble_scheduling {
|
| 41 |
+
step [
|
| 42 |
+
{
|
| 43 |
+
model_name: "feature_extractor"
|
| 44 |
+
model_version: -1
|
| 45 |
+
input_map {
|
| 46 |
+
key: "wav"
|
| 47 |
+
value: "WAV"
|
| 48 |
+
}
|
| 49 |
+
input_map {
|
| 50 |
+
key: "wav_lens"
|
| 51 |
+
value: "WAV_LENS"
|
| 52 |
+
}
|
| 53 |
+
output_map {
|
| 54 |
+
key: "speech"
|
| 55 |
+
value: "SPEECH"
|
| 56 |
+
}
|
| 57 |
+
output_map {
|
| 58 |
+
key: "speech_lengths"
|
| 59 |
+
value: "SPEECH_LENGTHS"
|
| 60 |
+
}
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
model_name: "encoder"
|
| 64 |
+
model_version: -1
|
| 65 |
+
input_map {
|
| 66 |
+
key: "x"
|
| 67 |
+
value: "SPEECH"
|
| 68 |
+
}
|
| 69 |
+
input_map {
|
| 70 |
+
key: "x_lens"
|
| 71 |
+
value: "SPEECH_LENGTHS"
|
| 72 |
+
}
|
| 73 |
+
output_map {
|
| 74 |
+
key: "encoder_out"
|
| 75 |
+
value: "encoder_out"
|
| 76 |
+
}
|
| 77 |
+
output_map {
|
| 78 |
+
key: "encoder_out_lens"
|
| 79 |
+
value: "encoder_out_lens"
|
| 80 |
+
}
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
model_name: "scorer"
|
| 84 |
+
model_version: -1
|
| 85 |
+
input_map {
|
| 86 |
+
key: "encoder_out"
|
| 87 |
+
value: "encoder_out"
|
| 88 |
+
}
|
| 89 |
+
input_map {
|
| 90 |
+
key: "encoder_out_lens"
|
| 91 |
+
value: "encoder_out_lens"
|
| 92 |
+
}
|
| 93 |
+
output_map {
|
| 94 |
+
key: "OUTPUT0"
|
| 95 |
+
value: "TRANSCRIPTS"
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
]
|
| 99 |
+
}
|