Commit
·
9234e07
1
Parent(s):
1f93eb0
small fixes
Browse files- .gitignore +1 -0
- add-model-metadata.py +8 -0
- convert-tokens.py +31 -0
- model.onnx +2 -2
- test-paraformer-onnx.py +7 -2
- tokens.txt +0 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
t
|
add-model-metadata.py
CHANGED
|
@@ -40,6 +40,11 @@ def load_lfr_params():
|
|
| 40 |
return lfr_window_size, lfr_window_shift
|
| 41 |
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
def add_meta_data(filename: str, meta_data: Dict[str, str]):
|
| 44 |
"""Add meta data to an ONNX model. It is changed in-place.
|
| 45 |
|
|
@@ -62,6 +67,8 @@ def add_meta_data(filename: str, meta_data: Dict[str, str]):
|
|
| 62 |
def main():
|
| 63 |
lfr_window_size, lfr_window_shift = load_lfr_params()
|
| 64 |
neg_mean, inv_stddev = load_cmvn()
|
|
|
|
|
|
|
| 65 |
meta_data = {
|
| 66 |
"lfr_window_size": str(lfr_window_size),
|
| 67 |
"lfr_window_shift": str(lfr_window_shift),
|
|
@@ -70,6 +77,7 @@ def main():
|
|
| 70 |
"model_type": "paraformer",
|
| 71 |
"version": "1",
|
| 72 |
"model_author": "damo",
|
|
|
|
| 73 |
"comment": "speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
|
| 74 |
}
|
| 75 |
add_meta_data("model.onnx", meta_data)
|
|
|
|
| 40 |
return lfr_window_size, lfr_window_shift
|
| 41 |
|
| 42 |
|
| 43 |
+
def get_vocab_size():
|
| 44 |
+
with open("tokens.txt") as f:
|
| 45 |
+
return len(f.readlines())
|
| 46 |
+
|
| 47 |
+
|
| 48 |
def add_meta_data(filename: str, meta_data: Dict[str, str]):
|
| 49 |
"""Add meta data to an ONNX model. It is changed in-place.
|
| 50 |
|
|
|
|
| 67 |
def main():
|
| 68 |
lfr_window_size, lfr_window_shift = load_lfr_params()
|
| 69 |
neg_mean, inv_stddev = load_cmvn()
|
| 70 |
+
vocab_size = get_vocab_size()
|
| 71 |
+
|
| 72 |
meta_data = {
|
| 73 |
"lfr_window_size": str(lfr_window_size),
|
| 74 |
"lfr_window_shift": str(lfr_window_shift),
|
|
|
|
| 77 |
"model_type": "paraformer",
|
| 78 |
"version": "1",
|
| 79 |
"model_author": "damo",
|
| 80 |
+
"vocab_size": str(vocab_size),
|
| 81 |
"comment": "speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
|
| 82 |
}
|
| 83 |
add_meta_data("model.onnx", meta_data)
|
convert-tokens.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import sys
|
| 3 |
+
from typing import Dict
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def load_tokens():
|
| 7 |
+
ans = dict()
|
| 8 |
+
i = 0
|
| 9 |
+
with open("tokens.txt", encoding="utf-8") as f:
|
| 10 |
+
for line in f:
|
| 11 |
+
if len(line.strip().split()) == 2:
|
| 12 |
+
sys.exit("Already converted!\nExiting")
|
| 13 |
+
|
| 14 |
+
ans[i] = line.strip()
|
| 15 |
+
i += 1
|
| 16 |
+
return ans
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def write_tokens(tokens: Dict[int, str]):
|
| 20 |
+
with open("new_tokens.txt", "w", encoding="utf-8") as f:
|
| 21 |
+
for idx, s in tokens.items():
|
| 22 |
+
f.write(f"{s} {idx}\n")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def main():
|
| 26 |
+
tokens = load_tokens()
|
| 27 |
+
write_tokens(tokens)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
if __name__ == "__main__":
|
| 31 |
+
main()
|
model.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3883fdb54abe1ab077997c8e14cf0532fc2498f8fb5eb118cd53d4ba05e4d169
|
| 3 |
+
size 863821140
|
test-paraformer-onnx.py
CHANGED
|
@@ -61,12 +61,15 @@ def compute_feat():
|
|
| 61 |
return features
|
| 62 |
|
| 63 |
|
|
|
|
|
|
|
|
|
|
| 64 |
def load_tokens():
|
| 65 |
ans = dict()
|
| 66 |
i = 0
|
| 67 |
with open("tokens.txt", encoding="utf-8") as f:
|
| 68 |
for line in f:
|
| 69 |
-
ans[i] = line.strip()
|
| 70 |
i += 1
|
| 71 |
return ans
|
| 72 |
|
|
@@ -76,7 +79,9 @@ def main():
|
|
| 76 |
features = np.expand_dims(features, axis=0)
|
| 77 |
features_length = np.array([features.shape[1]], dtype=np.int32)
|
| 78 |
|
| 79 |
-
|
|
|
|
|
|
|
| 80 |
|
| 81 |
inputs = {
|
| 82 |
"speech": features,
|
|
|
|
| 61 |
return features
|
| 62 |
|
| 63 |
|
| 64 |
+
# tokens.txt in paraformer has only one column
|
| 65 |
+
# while it has two columns ins sherpa-onnx.
|
| 66 |
+
# This function can handle tokens.txt from both paraformer and sherpa-onnx
|
| 67 |
def load_tokens():
|
| 68 |
ans = dict()
|
| 69 |
i = 0
|
| 70 |
with open("tokens.txt", encoding="utf-8") as f:
|
| 71 |
for line in f:
|
| 72 |
+
ans[i] = line.strip().split()[0]
|
| 73 |
i += 1
|
| 74 |
return ans
|
| 75 |
|
|
|
|
| 79 |
features = np.expand_dims(features, axis=0)
|
| 80 |
features_length = np.array([features.shape[1]], dtype=np.int32)
|
| 81 |
|
| 82 |
+
session_opts = onnxruntime.SessionOptions()
|
| 83 |
+
session_opts.log_severity_level = 3 # error level
|
| 84 |
+
sess = onnxruntime.InferenceSession("model.onnx", session_opts)
|
| 85 |
|
| 86 |
inputs = {
|
| 87 |
"speech": features,
|
tokens.txt
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|