trainer: identifier: "trainers.vocoder.hfgan.HiFiGANTrainer" total_steps: 1600000 # Total Steps * 2 (GANs) check_val_every_n_epoch: 2 save_every_n_epoch: 2 limit_val_batches: 500 precision: "32" distributed: false logging: log_dir: "outputs/hfgan_50hz_librispeech" num_samples: 10 feature: sample_rate: 16000 n_fft: 1025 win_length: 1024 hop_length: 320 n_mels: 80 f_min: 0 f_max: 8000 power: 1.0 log_scale: true model: generator: weight_norm: true upsample_rates: [5, 4, 2, 2, 2, 2] upsample_kernel_sizes: [10, 8, 4, 4, 4, 4] upsample_initial_channel: 512 resblock_kernel_sizes: [3, 7, 11] resblock_dilation_sizes: - [1, 3, 5] - [1, 3, 5] - [1, 3, 5] in_channels: 80 kernel_size: 7 mrd: weight_norm: true resolutions: - [1024, 120, 600] - [2048, 240, 1200] - [512, 50, 240] mpd: weight_norm: true periods: [2, 3, 5, 7, 11] training: generator: optimizer: identifier: "Adam" lr: 0.0001 beta1: 0.8 beta2: 0.98 scheduler: identifier: "triangle" warmup_steps: 0 flat_steps: 100000 discriminator: optimizer: identifier: "Adam" lr: 0.0001 beta1: 0.8 beta2: 0.98 scheduler: identifier: "triangle" warmup_steps: 0 flat_steps: 100000 mel_loss_weight: 40.0 data: train: path: "/usr2/liweiche/LibriSpeech-960/train/metadata.txt" wavdir: "/usr2/liweiche/LibriSpeech-960/train" segment_size: 1.0 sample_rate: 16000 dither: true with_text: false num_workers: 32 batch_size: 24 min_audio_length: 1.5 bits_per_second: 18500 sampler: type: "standard" shuffle: true val: path: "/usr2/liweiche/LibriSpeech-960/dev/metadata.txt" wavdir: "/usr2/liweiche/LibriSpeech-960/dev" sample_rate: 16000 segment_size: 7.0 with_text: false num_workers: 8 batch_size: 4 min_audio_length: 4.0 bits_per_second: 18500 sampler: type: "standard" shuffle: false