from text import symbols class AttrDict(dict): def __init__(self, *args, **kwargs): super(AttrDict, self).__init__(*args, **kwargs) self.__dict__ = self def create_hparams(hparams_string=None, verbose=False): """Create model hyperparameters. Parse nondefault from given string.""" hparams = AttrDict({ ################################ # Experiment Parameters # ################################ "epochs":1500, "iters_per_checkpoint":500, "seed":1234, "dynamic_loss_scaling":True, "fp16_run":False, "distributed_run":False, "dist_backend":"nccl", "dist_url":"tcp://localhost:14897", "cudnn_enabled":True, "cudnn_benchmark":False, "ignore_layers":['embedding.weight'], # freeze_layers":['encoder'], # Freeze tacotron2 layer for finetuning ################################ # Data Parameters # ################################ "load_mel_from_disk":False, "load_phone_from_disk":True, "training_files":'filelists/train_files.txt', "validation_files":'filelists/val_files.txt', "text_cleaners":['transliteration_cleaners'], ################################ # Audio Parameters # ################################ "max_wav_value":32768.0, "sampling_rate":22050, "filter_length":1024, "hop_length":256, "win_length":1024, "n_mel_channels":80, "mel_fmin":0.0, "mel_fmax":8000.0, ################################ # Model Parameters # ################################ "n_symbols": len(symbols), "symbols_embedding_dim":512, "alignloss": "L2", "attention": "StepwiseMonotonicAttention", # Encoder parameters "encoder_kernel_size":5, "encoder_n_convolutions":3, "encoder_embedding_dim":512, "speaker_embedding_dim":256, # Decoder parameters "n_frames_per_step":1, # currently only 1 is supported "decoder_rnn_dim":1024, "prenet_dim":256, "max_decoder_steps":1000, "gate_threshold":0.5, "p_attention_dropout":0.1, "p_decoder_dropout":0.1, # Attention parameters "attention_rnn_dim":1024, "attention_dim":128, # Location Layer parameters "attention_location_n_filters":32, "attention_location_kernel_size":31, # Mel-post processing network parameters "postnet_embedding_dim":512, "postnet_kernel_size":5, "postnet_n_convolutions":5, ################################ # Optimization Hyperparameters # ################################ "use_saved_learning_rate":True, "learning_rate":1e-3, "weight_decay":1e-6, "grad_clip_thresh":1.0, "batch_size":8, # each gpus "mask_padding":True # set model's padded outputs to padded values }) if hparams_string: hps = hparams_string[1:-2].split("-") for hp in hps: k,v = hp.split(":") if k in hparams: hparams[k] = v print("Set hparam: " + k + " to " + v) return hparams