File size: 1,672 Bytes
a520400 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | allow_cache: true
batch_max_steps: 8192
batch_max_steps_valid: 81920
batch_size: 1
config: ./ttsexamples/melgan_stft/conf/melgan_stft.v1.yaml
dev_dir: /content/dump_ljspeech/valid
discriminator_mixed_precision: false
discriminator_optimizer_params:
lr_fn: PiecewiseConstantDecay
lr_params:
boundaries: [0]
values: [0.0001, 0.0001]
discriminator_train_start_steps: 100000
eval_interval_steps: 5000
format: npy
generator_mixed_precision: true
generator_optimizer_params:
lr_fn: PiecewiseConstantDecay
lr_params:
boundaries: [100000]
values: [0.0005, 0.0001]
gradient_accumulation_steps: 1
hop_size: 256
is_shuffle: true
lambda_adv: 4.0
lambda_feat_match: 10.0
log_interval_steps: 200
melgan_discriminator_params:
downsample_pooling: AveragePooling1D
downsample_pooling_params: {pool_size: 4, strides: 2}
downsample_scales: [4, 4, 4, 4]
filters: 16
is_weight_norm: false
kernel_sizes: [5, 3]
max_downsample_filters: 1024
nonlinear_activation: LeakyReLU
nonlinear_activation_params: {alpha: 0.2}
out_channels: 1
scales: 3
melgan_generator_params:
filters: 512
is_weight_norm: false
kernel_size: 7
out_channels: 1
stack_kernel_size: 3
stacks: 3
upsample_scales: [8, 8, 2, 2]
model_type: melgan_generator
num_save_intermediate_results: 1
outdir: ./ttsexamples/melgan_stft/exp/train.melgan_stft.v1/
pretrained: ''
remove_short_samples: true
resume: ''
sampling_rate: 22050
save_interval_steps: 200
stft_loss_params:
fft_lengths: [1024, 2048, 512]
frame_lengths: [600, 1200, 240]
frame_steps: [120, 240, 50]
train_dir: /content/dump_ljspeech/train
train_max_steps: 4000000
use_norm: true
verbose: 1
version: '0.0'
|