Spaces:
Running
on
Zero
Running
on
Zero
| from os import PathLike | |
| from typing import Dict, List, Optional, Union | |
| from wenet.text.char_tokenizer import CharTokenizer | |
| from wenet.text.tokenize_utils import tokenize_by_seg_dict | |
| def read_seg_dict(path): | |
| seg_table = {} | |
| with open(path, 'r', encoding='utf8') as fin: | |
| for line in fin: | |
| arr = line.strip().split('\t') | |
| assert len(arr) == 2 | |
| seg_table[arr[0]] = arr[1] | |
| return seg_table | |