from Imports import * SAMPLE_RATE = 16000 FRAME_STEP = 256 FFT_LENGTH = 1024 NUM_MEL_BINS = 80 LOWER_EDGE_HERTZ = 80.0 UPPER_EDGE_HERTZ = 7600.0 R = 3 # reduction factor — predict R mel frames per decoder step PRE_EMPH = 0.97 # pre-emphasis coefficient GRAD_CLIP = 5.0 # gradient clip norm # ── NEW: static shapes for training ─────────────────────────────────────────── MAX_TEXT_LEN = 200 # pad all text sequences to this MAX_MEL_LEN = 600 # pad all mel sequences to this MAX_WAV_LEN = MAX_MEL_LEN * FRAME_STEP # = 222720 samples VOCAB = list("abcdefghijklmnopqrstuvwxyz .,!?-'\"") PAD_TOKEN = '' EOS_TOKEN = '' vocab_list = [PAD_TOKEN, EOS_TOKEN] + VOCAB char2id = {c: i for i, c in enumerate(vocab_list)} id2char = {i: c for c, i in char2id.items()} VOCAB_SIZE = len(char2id) PAD_ID = char2id[PAD_TOKEN] EOS_ID = char2id[EOS_TOKEN] keys_tensor = tf.constant(list(char2id.keys())) values_tensor = tf.constant(list(char2id.values()), dtype=tf.int32) char_to_id_table = tf.lookup.StaticHashTable( tf.lookup.KeyValueTensorInitializer(keys_tensor, values_tensor), default_value=PAD_ID ) keys_tensor2 = tf.constant(list(id2char.keys()), dtype=tf.int32) values_tensor2 = tf.constant(list(id2char.values())) id_to_char_table = tf.lookup.StaticHashTable( tf.lookup.KeyValueTensorInitializer(keys_tensor2, values_tensor2), default_value='?' ) print(VOCAB_SIZE)