t2 / config.yml
duongmle's picture
Upload config.yml
a520400
allow_cache: true
batch_max_steps: 8192
batch_max_steps_valid: 81920
batch_size: 1
config: ./ttsexamples/melgan_stft/conf/melgan_stft.v1.yaml
dev_dir: /content/dump_ljspeech/valid
discriminator_mixed_precision: false
discriminator_optimizer_params:
lr_fn: PiecewiseConstantDecay
lr_params:
boundaries: [0]
values: [0.0001, 0.0001]
discriminator_train_start_steps: 100000
eval_interval_steps: 5000
format: npy
generator_mixed_precision: true
generator_optimizer_params:
lr_fn: PiecewiseConstantDecay
lr_params:
boundaries: [100000]
values: [0.0005, 0.0001]
gradient_accumulation_steps: 1
hop_size: 256
is_shuffle: true
lambda_adv: 4.0
lambda_feat_match: 10.0
log_interval_steps: 200
melgan_discriminator_params:
downsample_pooling: AveragePooling1D
downsample_pooling_params: {pool_size: 4, strides: 2}
downsample_scales: [4, 4, 4, 4]
filters: 16
is_weight_norm: false
kernel_sizes: [5, 3]
max_downsample_filters: 1024
nonlinear_activation: LeakyReLU
nonlinear_activation_params: {alpha: 0.2}
out_channels: 1
scales: 3
melgan_generator_params:
filters: 512
is_weight_norm: false
kernel_size: 7
out_channels: 1
stack_kernel_size: 3
stacks: 3
upsample_scales: [8, 8, 2, 2]
model_type: melgan_generator
num_save_intermediate_results: 1
outdir: ./ttsexamples/melgan_stft/exp/train.melgan_stft.v1/
pretrained: ''
remove_short_samples: true
resume: ''
sampling_rate: 22050
save_interval_steps: 200
stft_loss_params:
fft_lengths: [1024, 2048, 512]
frame_lengths: [600, 1200, 240]
frame_steps: [120, 240, 50]
train_dir: /content/dump_ljspeech/train
train_max_steps: 4000000
use_norm: true
verbose: 1
version: '0.0'