Spaces:

masterofaudio2077
/

TTSIE

Sleeping

TTSIE / Configuration.py

Upload 13 files

c39b616 verified 4 months ago

1.63 kB

	from Imports import *

	SAMPLE_RATE = 16000
	FRAME_STEP = 256
	FFT_LENGTH = 1024
	NUM_MEL_BINS = 80
	LOWER_EDGE_HERTZ = 80.0
	UPPER_EDGE_HERTZ = 7600.0
	R = 3 # reduction factor — predict R mel frames per decoder step
	PRE_EMPH = 0.97 # pre-emphasis coefficient
	GRAD_CLIP = 5.0 # gradient clip norm

	# ── NEW: static shapes for training ───────────────────────────────────────────
	MAX_TEXT_LEN = 200 # pad all text sequences to this
	MAX_MEL_LEN = 600 # pad all mel sequences to this
	MAX_WAV_LEN = MAX_MEL_LEN * FRAME_STEP # = 222720 samples

	VOCAB = list("abcdefghijklmnopqrstuvwxyz .,!?-'\"")
	PAD_TOKEN = '<PAD>'
	EOS_TOKEN = '<EOS>'
	vocab_list = [PAD_TOKEN, EOS_TOKEN] + VOCAB
	char2id = {c: i for i, c in enumerate(vocab_list)}
	id2char = {i: c for c, i in char2id.items()}
	VOCAB_SIZE = len(char2id)
	PAD_ID = char2id[PAD_TOKEN]
	EOS_ID = char2id[EOS_TOKEN]

	keys_tensor = tf.constant(list(char2id.keys()))
	values_tensor = tf.constant(list(char2id.values()), dtype=tf.int32)
	char_to_id_table = tf.lookup.StaticHashTable(
	tf.lookup.KeyValueTensorInitializer(keys_tensor, values_tensor),
	default_value=PAD_ID
	)
	keys_tensor2 = tf.constant(list(id2char.keys()), dtype=tf.int32)
	values_tensor2 = tf.constant(list(id2char.values()))
	id_to_char_table = tf.lookup.StaticHashTable(
	tf.lookup.KeyValueTensorInitializer(keys_tensor2, values_tensor2),
	default_value='?'
	)
	print(VOCAB_SIZE)