| allow_cache: false |
| batch_frames: 10000 |
| config: conf/ctxv2w.v1.yaml |
| crop_max_frames: 100 |
| dev_aux_scp: feats/normed_ppe/dev_all/feats.scp |
| dev_mel_scp: feats/normed_fbank/dev_all/feats.scp |
| dev_num_frames: data/dev_all/utt2num_frames |
| dev_prompt_scp: feats/wavlm_l6/dev_all/feats.scp |
| dev_segments: null |
| dev_vqidx_scp: feats/vqidx/dev_all/feats.scp |
| dev_wav_scp: data/dev_all/wav.scp |
| dev_xvector_scp: null |
| discriminator_adv_loss_params: |
| average_by_discriminators: false |
| discriminator_grad_norm: -1 |
| discriminator_optimizer_params: |
| betas: |
| - 0.5 |
| - 0.9 |
| lr: 0.0002 |
| weight_decay: 0.0 |
| discriminator_optimizer_type: Adam |
| discriminator_params: |
| follow_official_norm: true |
| period_discriminator_params: |
| bias: true |
| channels: 32 |
| downsample_scales: |
| - 3 |
| - 3 |
| - 3 |
| - 3 |
| - 1 |
| in_channels: 1 |
| kernel_sizes: |
| - 5 |
| - 3 |
| max_downsample_channels: 1024 |
| nonlinear_activation: LeakyReLU |
| nonlinear_activation_params: |
| negative_slope: 0.1 |
| out_channels: 1 |
| use_spectral_norm: false |
| use_weight_norm: true |
| periods: |
| - 2 |
| - 3 |
| - 5 |
| - 7 |
| - 11 |
| scale_discriminator_params: |
| bias: true |
| channels: 128 |
| downsample_scales: |
| - 4 |
| - 4 |
| - 4 |
| - 4 |
| - 1 |
| in_channels: 1 |
| kernel_sizes: |
| - 15 |
| - 41 |
| - 5 |
| - 3 |
| max_downsample_channels: 1024 |
| max_groups: 16 |
| nonlinear_activation: LeakyReLU |
| nonlinear_activation_params: |
| negative_slope: 0.1 |
| out_channels: 1 |
| scale_downsample_pooling: AvgPool1d |
| scale_downsample_pooling_params: |
| kernel_size: 4 |
| padding: 2 |
| stride: 2 |
| scales: 3 |
| discriminator_scheduler_params: |
| gamma: 0.5 |
| milestones: |
| - 200000 |
| - 400000 |
| - 600000 |
| - 800000 |
| discriminator_scheduler_type: MultiStepLR |
| discriminator_train_start_steps: 0 |
| discriminator_type: HiFiGANMultiScaleMultiPeriodDiscriminator |
| distributed: true |
| dropout_features: 0.0 |
| eval_interval_steps: 100000 |
| feat_match_loss_params: |
| average_by_discriminators: false |
| average_by_layers: false |
| include_final_outputs: false |
| frontend_mel_prediction_stop_steps: 200000 |
| frontend_params: |
| conformer_params: |
| activation_type: swish |
| attention_dim: 184 |
| attention_dropout_rate: 0.2 |
| attention_heads: 2 |
| cnn_module_kernel: 31 |
| concat_after: false |
| dropout_rate: 0.2 |
| linear_units: 1536 |
| macaron_style: true |
| normalize_before: true |
| num_blocks: 2 |
| pos_enc_layer_type: rel_pos |
| positional_dropout_rate: 0.2 |
| positionwise_conv_kernel_size: 3 |
| positionwise_layer_type: conv1d |
| selfattention_layer_type: rel_selfattn |
| use_cnn_module: true |
| prompt_channels: 1024 |
| vqvec_channels: 64 |
| generator_adv_loss_params: |
| average_by_discriminators: false |
| generator_grad_norm: -1 |
| generator_optimizer_params: |
| betas: |
| - 0.5 |
| - 0.9 |
| lr: 0.0002 |
| weight_decay: 0.0 |
| generator_optimizer_type: Adam |
| generator_params: |
| bias: true |
| channels: 512 |
| in_channels: 184 |
| kernel_size: 7 |
| nonlinear_activation: LeakyReLU |
| nonlinear_activation_params: |
| negative_slope: 0.1 |
| out_channels: 1 |
| resblock_dilations: |
| - - 1 |
| - 3 |
| - 5 |
| - - 1 |
| - 3 |
| - 5 |
| - - 1 |
| - 3 |
| - 5 |
| resblock_kernel_sizes: |
| - 3 |
| - 7 |
| - 11 |
| upsample_kernel_sizes: |
| - 16 |
| - 10 |
| - 8 |
| - 6 |
| upsample_scales: |
| - 8 |
| - 5 |
| - 4 |
| - 3 |
| use_additional_convs: true |
| use_weight_norm: true |
| generator_scheduler_params: |
| gamma: 0.5 |
| milestones: |
| - 200000 |
| - 400000 |
| - 600000 |
| - 800000 |
| generator_scheduler_type: MultiStepLR |
| generator_train_start_steps: 1 |
| generator_type: HiFiGANGenerator |
| hop_size: 480 |
| lambda_adv: 1.0 |
| lambda_aux: 45.0 |
| lambda_feat_match: 2.0 |
| lambda_frontend_mel_prediction: 60 |
| length_tolerance: 5 |
| log_interval_steps: 1000 |
| max_num_frames: 3000 |
| mel_loss_params: |
| fft_size: 2048 |
| fmax: 8000 |
| fmin: 40 |
| fs: 24000 |
| hop_size: 300 |
| log_base: null |
| num_mels: 80 |
| win_length: 1200 |
| window: hann |
| min_num_frames: 600 |
| num_mels: 80 |
| num_save_intermediate_results: 4 |
| num_workers: 64 |
| outdir: exp/train_all_ctxv2w.v1 |
| pin_memory: true |
| pretrain: '' |
| prompt_net_type: ConvPromptPrenet |
| rank: 0 |
| resume: '' |
| sampling_rate: 24000 |
| save_interval_steps: 10000 |
| train_aux_scp: feats/normed_ppe/train_all/feats.scp |
| train_max_steps: 1000000 |
| train_mel_scp: feats/normed_fbank/train_all/feats.scp |
| train_num_frames: data/train_all/utt2num_frames |
| train_prompt_scp: feats/wavlm_l6/train_all/feats.scp |
| train_segments: null |
| train_vqidx_scp: feats/vqidx/train_all/feats.scp |
| train_wav_scp: data/train_all/wav.scp |
| train_xvector_scp: null |
| use_feat_match_loss: true |
| use_mel_loss: true |
| use_stft_loss: false |
| verbose: 1 |
| version: 0.5.3 |
| vq_codebook: pretrained/codebook_25hz.npy |
| win_length: 1394 |
| world_size: 2 |
|
|
| repeat_input_tokens: true |
|
|