| | import os |
| | import sys |
| | import re |
| | from pypinyin import lazy_pinyin, BOPOMOFO |
| | import jieba |
| | import cn2an |
| | import logging |
| |
|
| | logging.getLogger('jieba').setLevel(logging.WARNING) |
| | jieba.set_dictionary(os.path.dirname(os.path.realpath(sys.argv[0])) + '/jieba/dict.txt') |
| | jieba.initialize() |
| |
|
| | |
| | _latin_to_bopomofo = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [ |
| | ('a', 'ㄟˉ'), |
| | ('b', 'ㄅㄧˋ'), |
| | ('c', 'ㄙㄧˉ'), |
| | ('d', 'ㄉㄧˋ'), |
| | ('e', 'ㄧˋ'), |
| | ('f', 'ㄝˊㄈㄨˋ'), |
| | ('g', 'ㄐㄧˋ'), |
| | ('h', 'ㄝˇㄑㄩˋ'), |
| | ('i', 'ㄞˋ'), |
| | ('j', 'ㄐㄟˋ'), |
| | ('k', 'ㄎㄟˋ'), |
| | ('l', 'ㄝˊㄛˋ'), |
| | ('m', 'ㄝˊㄇㄨˋ'), |
| | ('n', 'ㄣˉ'), |
| | ('o', 'ㄡˉ'), |
| | ('p', 'ㄆㄧˉ'), |
| | ('q', 'ㄎㄧㄡˉ'), |
| | ('r', 'ㄚˋ'), |
| | ('s', 'ㄝˊㄙˋ'), |
| | ('t', 'ㄊㄧˋ'), |
| | ('u', 'ㄧㄡˉ'), |
| | ('v', 'ㄨㄧˉ'), |
| | ('w', 'ㄉㄚˋㄅㄨˋㄌㄧㄡˋ'), |
| | ('x', 'ㄝˉㄎㄨˋㄙˋ'), |
| | ('y', 'ㄨㄞˋ'), |
| | ('z', 'ㄗㄟˋ') |
| | ]] |
| |
|
| | |
| | _bopomofo_to_romaji = [(re.compile('%s' % x[0]), x[1]) for x in [ |
| | ('ㄅㄛ', 'p⁼wo'), |
| | ('ㄆㄛ', 'pʰwo'), |
| | ('ㄇㄛ', 'mwo'), |
| | ('ㄈㄛ', 'fwo'), |
| | ('ㄅ', 'p⁼'), |
| | ('ㄆ', 'pʰ'), |
| | ('ㄇ', 'm'), |
| | ('ㄈ', 'f'), |
| | ('ㄉ', 't⁼'), |
| | ('ㄊ', 'tʰ'), |
| | ('ㄋ', 'n'), |
| | ('ㄌ', 'l'), |
| | ('ㄍ', 'k⁼'), |
| | ('ㄎ', 'kʰ'), |
| | ('ㄏ', 'h'), |
| | ('ㄐ', 'ʧ⁼'), |
| | ('ㄑ', 'ʧʰ'), |
| | ('ㄒ', 'ʃ'), |
| | ('ㄓ', 'ʦ`⁼'), |
| | ('ㄔ', 'ʦ`ʰ'), |
| | ('ㄕ', 's`'), |
| | ('ㄖ', 'ɹ`'), |
| | ('ㄗ', 'ʦ⁼'), |
| | ('ㄘ', 'ʦʰ'), |
| | ('ㄙ', 's'), |
| | ('ㄚ', 'a'), |
| | ('ㄛ', 'o'), |
| | ('ㄜ', 'ə'), |
| | ('ㄝ', 'e'), |
| | ('ㄞ', 'ai'), |
| | ('ㄟ', 'ei'), |
| | ('ㄠ', 'au'), |
| | ('ㄡ', 'ou'), |
| | ('ㄧㄢ', 'yeNN'), |
| | ('ㄢ', 'aNN'), |
| | ('ㄧㄣ', 'iNN'), |
| | ('ㄣ', 'əNN'), |
| | ('ㄤ', 'aNg'), |
| | ('ㄧㄥ', 'iNg'), |
| | ('ㄨㄥ', 'uNg'), |
| | ('ㄩㄥ', 'yuNg'), |
| | ('ㄥ', 'əNg'), |
| | ('ㄦ', 'əɻ'), |
| | ('ㄧ', 'i'), |
| | ('ㄨ', 'u'), |
| | ('ㄩ', 'ɥ'), |
| | ('ˉ', '→'), |
| | ('ˊ', '↑'), |
| | ('ˇ', '↓↑'), |
| | ('ˋ', '↓'), |
| | ('˙', ''), |
| | (',', ','), |
| | ('。', '.'), |
| | ('!', '!'), |
| | ('?', '?'), |
| | ('—', '-') |
| | ]] |
| |
|
| | |
| | _romaji_to_ipa = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [ |
| | ('ʃy', 'ʃ'), |
| | ('ʧʰy', 'ʧʰ'), |
| | ('ʧ⁼y', 'ʧ⁼'), |
| | ('NN', 'n'), |
| | ('Ng', 'ŋ'), |
| | ('y', 'j'), |
| | ('h', 'x') |
| | ]] |
| |
|
| | |
| | _bopomofo_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [ |
| | ('ㄅㄛ', 'p⁼wo'), |
| | ('ㄆㄛ', 'pʰwo'), |
| | ('ㄇㄛ', 'mwo'), |
| | ('ㄈㄛ', 'fwo'), |
| | ('ㄅ', 'p⁼'), |
| | ('ㄆ', 'pʰ'), |
| | ('ㄇ', 'm'), |
| | ('ㄈ', 'f'), |
| | ('ㄉ', 't⁼'), |
| | ('ㄊ', 'tʰ'), |
| | ('ㄋ', 'n'), |
| | ('ㄌ', 'l'), |
| | ('ㄍ', 'k⁼'), |
| | ('ㄎ', 'kʰ'), |
| | ('ㄏ', 'x'), |
| | ('ㄐ', 'tʃ⁼'), |
| | ('ㄑ', 'tʃʰ'), |
| | ('ㄒ', 'ʃ'), |
| | ('ㄓ', 'ts`⁼'), |
| | ('ㄔ', 'ts`ʰ'), |
| | ('ㄕ', 's`'), |
| | ('ㄖ', 'ɹ`'), |
| | ('ㄗ', 'ts⁼'), |
| | ('ㄘ', 'tsʰ'), |
| | ('ㄙ', 's'), |
| | ('ㄚ', 'a'), |
| | ('ㄛ', 'o'), |
| | ('ㄜ', 'ə'), |
| | ('ㄝ', 'ɛ'), |
| | ('ㄞ', 'aɪ'), |
| | ('ㄟ', 'eɪ'), |
| | ('ㄠ', 'ɑʊ'), |
| | ('ㄡ', 'oʊ'), |
| | ('ㄧㄢ', 'jɛn'), |
| | ('ㄩㄢ', 'ɥæn'), |
| | ('ㄢ', 'an'), |
| | ('ㄧㄣ', 'in'), |
| | ('ㄩㄣ', 'ɥn'), |
| | ('ㄣ', 'ən'), |
| | ('ㄤ', 'ɑŋ'), |
| | ('ㄧㄥ', 'iŋ'), |
| | ('ㄨㄥ', 'ʊŋ'), |
| | ('ㄩㄥ', 'jʊŋ'), |
| | ('ㄥ', 'əŋ'), |
| | ('ㄦ', 'əɻ'), |
| | ('ㄧ', 'i'), |
| | ('ㄨ', 'u'), |
| | ('ㄩ', 'ɥ'), |
| | ('ˉ', '→'), |
| | ('ˊ', '↑'), |
| | ('ˇ', '↓↑'), |
| | ('ˋ', '↓'), |
| | ('˙', ''), |
| | (',', ','), |
| | ('。', '.'), |
| | ('!', '!'), |
| | ('?', '?'), |
| | ('—', '-') |
| | ]] |
| |
|
| | |
| | _bopomofo_to_ipa2 = [(re.compile('%s' % x[0]), x[1]) for x in [ |
| | ('ㄅㄛ', 'pwo'), |
| | ('ㄆㄛ', 'pʰwo'), |
| | ('ㄇㄛ', 'mwo'), |
| | ('ㄈㄛ', 'fwo'), |
| | ('ㄅ', 'p'), |
| | ('ㄆ', 'pʰ'), |
| | ('ㄇ', 'm'), |
| | ('ㄈ', 'f'), |
| | ('ㄉ', 't'), |
| | ('ㄊ', 'tʰ'), |
| | ('ㄋ', 'n'), |
| | ('ㄌ', 'l'), |
| | ('ㄍ', 'k'), |
| | ('ㄎ', 'kʰ'), |
| | ('ㄏ', 'h'), |
| | ('ㄐ', 'tɕ'), |
| | ('ㄑ', 'tɕʰ'), |
| | ('ㄒ', 'ɕ'), |
| | ('ㄓ', 'tʂ'), |
| | ('ㄔ', 'tʂʰ'), |
| | ('ㄕ', 'ʂ'), |
| | ('ㄖ', 'ɻ'), |
| | ('ㄗ', 'ts'), |
| | ('ㄘ', 'tsʰ'), |
| | ('ㄙ', 's'), |
| | ('ㄚ', 'a'), |
| | ('ㄛ', 'o'), |
| | ('ㄜ', 'ɤ'), |
| | ('ㄝ', 'ɛ'), |
| | ('ㄞ', 'aɪ'), |
| | ('ㄟ', 'eɪ'), |
| | ('ㄠ', 'ɑʊ'), |
| | ('ㄡ', 'oʊ'), |
| | ('ㄧㄢ', 'jɛn'), |
| | ('ㄩㄢ', 'yæn'), |
| | ('ㄢ', 'an'), |
| | ('ㄧㄣ', 'in'), |
| | ('ㄩㄣ', 'yn'), |
| | ('ㄣ', 'ən'), |
| | ('ㄤ', 'ɑŋ'), |
| | ('ㄧㄥ', 'iŋ'), |
| | ('ㄨㄥ', 'ʊŋ'), |
| | ('ㄩㄥ', 'jʊŋ'), |
| | ('ㄥ', 'ɤŋ'), |
| | ('ㄦ', 'əɻ'), |
| | ('ㄧ', 'i'), |
| | ('ㄨ', 'u'), |
| | ('ㄩ', 'y'), |
| | ('ˉ', '˥'), |
| | ('ˊ', '˧˥'), |
| | ('ˇ', '˨˩˦'), |
| | ('ˋ', '˥˩'), |
| | ('˙', ''), |
| | (',', ','), |
| | ('。', '.'), |
| | ('!', '!'), |
| | ('?', '?'), |
| | ('—', '-') |
| | ]] |
| |
|
| | _symbols_to_chinese = [(re.compile(f'{x[0]}'), x[1]) for x in [ |
| | ('([0-9]+(?:\.?[0-9]+)?)%', r'百分之\1'), |
| | ('([0-9]+)/([0-9]+)', r'\2分之\1'), |
| | ('\+', r'加'), |
| | ('([0-9]+)-([0-9]+)', r'\1减\2'), |
| | ('×', r'乘以'), |
| | ('([0-9]+)x([0-9]+)', r'\1乘以\2'), |
| | ('([0-9]+)\*([0-9]+)', r'\1乘以\2'), |
| | ('÷', r'除以'), |
| | ('=', r'等于'), |
| | ('≠', r'不等于'), |
| | ]] |
| |
|
| |
|
| | def symbols_to_chinese(text): |
| | for regex, replacement in _symbols_to_chinese: |
| | text = re.sub(regex, replacement, text) |
| | return text |
| |
|
| |
|
| | def number_to_chinese(text): |
| | numbers = re.findall(r'[0-9]+(?:\.?[0-9]+)?', text) |
| | for number in numbers: |
| | text = text.replace(number, cn2an.an2cn(number), 1) |
| | return text |
| |
|
| |
|
| | def number_transform_to_chinese(text): |
| | text = cn2an.transform(text, "an2cn") |
| | return text |
| |
|
| |
|
| | def chinese_to_bopomofo(text): |
| | text = text.replace('、', ',').replace(';', ',').replace(':', ',') |
| | words = jieba.lcut(text, cut_all=False) |
| | text = '' |
| | for word in words: |
| | bopomofos = lazy_pinyin(word, BOPOMOFO) |
| | if not re.search('[\u4e00-\u9fff]', word): |
| | text += word |
| | continue |
| | for i in range(len(bopomofos)): |
| | bopomofos[i] = re.sub(r'([\u3105-\u3129])$', r'\1ˉ', bopomofos[i]) |
| | if text != '': |
| | text += ' ' |
| | text += ''.join(bopomofos) |
| | return text |
| |
|
| |
|
| | def latin_to_bopomofo(text): |
| | for regex, replacement in _latin_to_bopomofo: |
| | text = re.sub(regex, replacement, text) |
| | return text |
| |
|
| |
|
| | def bopomofo_to_romaji(text): |
| | for regex, replacement in _bopomofo_to_romaji: |
| | text = re.sub(regex, replacement, text) |
| | return text |
| |
|
| |
|
| | def bopomofo_to_ipa(text): |
| | for regex, replacement in _bopomofo_to_ipa: |
| | text = re.sub(regex, replacement, text) |
| | return text |
| |
|
| |
|
| | def bopomofo_to_ipa2(text): |
| | for regex, replacement in _bopomofo_to_ipa2: |
| | text = re.sub(regex, replacement, text) |
| | return text |
| |
|
| |
|
| | def chinese_to_romaji(text): |
| | text = symbols_to_chinese(text) |
| | text = number_transform_to_chinese(text) |
| | text = chinese_to_bopomofo(text) |
| | text = latin_to_bopomofo(text) |
| | text = bopomofo_to_romaji(text) |
| | text = re.sub('i([aoe])', r'y\1', text) |
| | text = re.sub('u([aoəe])', r'w\1', text) |
| | text = re.sub('([ʦsɹ]`[⁼ʰ]?)([→↓↑ ]+|$)', |
| | r'\1ɹ`\2', text).replace('ɻ', 'ɹ`') |
| | text = re.sub('([ʦs][⁼ʰ]?)([→↓↑ ]+|$)', r'\1ɹ\2', text) |
| | return text |
| |
|
| |
|
| | def chinese_to_lazy_ipa(text): |
| | text = chinese_to_romaji(text) |
| | for regex, replacement in _romaji_to_ipa: |
| | text = re.sub(regex, replacement, text) |
| | return text |
| |
|
| |
|
| | def chinese_to_ipa(text): |
| | text = symbols_to_chinese(text) |
| | text = number_transform_to_chinese(text) |
| | text = chinese_to_bopomofo(text) |
| | text = latin_to_bopomofo(text) |
| | text = bopomofo_to_ipa(text) |
| | text = re.sub('i([aoe])', r'j\1', text) |
| | text = re.sub('u([aoəe])', r'w\1', text) |
| | text = re.sub('([sɹ]`[⁼ʰ]?)([→↓↑ ]+|$)', |
| | r'\1ɹ`\2', text).replace('ɻ', 'ɹ`') |
| | text = re.sub('([s][⁼ʰ]?)([→↓↑ ]+|$)', r'\1ɹ\2', text) |
| | return text |
| |
|
| |
|
| | def chinese_to_ipa2(text): |
| | text = symbols_to_chinese(text) |
| | text = number_transform_to_chinese(text) |
| | text = chinese_to_bopomofo(text) |
| | text = latin_to_bopomofo(text) |
| | text = bopomofo_to_ipa2(text) |
| | text = re.sub(r'i([aoe])', r'j\1', text) |
| | text = re.sub(r'u([aoəe])', r'w\1', text) |
| | text = re.sub(r'([ʂɹ]ʰ?)([˩˨˧˦˥ ]+|$)', r'\1ʅ\2', text) |
| | text = re.sub(r'(sʰ?)([˩˨˧˦˥ ]+|$)', r'\1ɿ\2', text) |
| | return text |
| |
|
| |
|
| | def VITS_PinYin_model(): |
| | import torch |
| | import config |
| | from vits_pinyin import VITS_PinYin |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | |
| | tts_front = VITS_PinYin(f"{config.ABS_PATH}/bert", device) |
| | return tts_front |
| |
|