File size: 930 Bytes
f8b4a6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from .symbols import *


_symbol_to_id = {s: i for i, s in enumerate(symbols)}


def cleaned_text_to_sequence(cleaned_text, tones, language, symbol_to_id=None):
    """Converts a string of text to a sequence of IDs corresponding to the symbols in the text.

    Args:

      text: string to convert to a sequence

    Returns:

      List of integers corresponding to the symbols in the text

    """
    symbol_to_id_map = symbol_to_id if symbol_to_id else _symbol_to_id
    unk_id = symbol_to_id_map.get("UNK")
    if unk_id is None:
        phones = [symbol_to_id_map[symbol] for symbol in cleaned_text]
    else:
        phones = [symbol_to_id_map.get(symbol, unk_id) for symbol in cleaned_text]
    tone_start = language_tone_start_map[language]
    tones = [i + tone_start for i in tones]
    lang_id = language_id_map[language]
    lang_ids = [lang_id for _ in phones]
    return phones, tones, lang_ids