TTSIE / Configuration.py
masterofaudio2077's picture
Upload 13 files
c39b616 verified
Raw
History Blame Contribute Delete
1.63 kB
from Imports import *
SAMPLE_RATE = 16000
FRAME_STEP = 256
FFT_LENGTH = 1024
NUM_MEL_BINS = 80
LOWER_EDGE_HERTZ = 80.0
UPPER_EDGE_HERTZ = 7600.0
R = 3 # reduction factor β€” predict R mel frames per decoder step
PRE_EMPH = 0.97 # pre-emphasis coefficient
GRAD_CLIP = 5.0 # gradient clip norm
# ── NEW: static shapes for training ───────────────────────────────────────────
MAX_TEXT_LEN = 200 # pad all text sequences to this
MAX_MEL_LEN = 600 # pad all mel sequences to this
MAX_WAV_LEN = MAX_MEL_LEN * FRAME_STEP # = 222720 samples
VOCAB = list("abcdefghijklmnopqrstuvwxyz .,!?-'\"")
PAD_TOKEN = '<PAD>'
EOS_TOKEN = '<EOS>'
vocab_list = [PAD_TOKEN, EOS_TOKEN] + VOCAB
char2id = {c: i for i, c in enumerate(vocab_list)}
id2char = {i: c for c, i in char2id.items()}
VOCAB_SIZE = len(char2id)
PAD_ID = char2id[PAD_TOKEN]
EOS_ID = char2id[EOS_TOKEN]
keys_tensor = tf.constant(list(char2id.keys()))
values_tensor = tf.constant(list(char2id.values()), dtype=tf.int32)
char_to_id_table = tf.lookup.StaticHashTable(
tf.lookup.KeyValueTensorInitializer(keys_tensor, values_tensor),
default_value=PAD_ID
)
keys_tensor2 = tf.constant(list(id2char.keys()), dtype=tf.int32)
values_tensor2 = tf.constant(list(id2char.values()))
id_to_char_table = tf.lookup.StaticHashTable(
tf.lookup.KeyValueTensorInitializer(keys_tensor2, values_tensor2),
default_value='?'
)
print(VOCAB_SIZE)