cantabile-kwok
/

vec2wav2.0

Model card Files Files and versions

xet

Community

cantabile-kwok commited on Oct 26, 2024

Commit

fab5f60

verified ·

1 Parent(s): 58f5ee0

Upload config.yml

Browse files

Files changed (1) hide show

config.yml +201 -0

config.yml ADDED Viewed

	@@ -0,0 +1,201 @@

+allow_cache: false
+batch_frames: 3600
+config: conf/ctxv2w.v1.yaml
+crop_max_frames: 100
+discriminator_adv_loss_params:
+  average_by_discriminators: false
+discriminator_grad_norm: -1
+discriminator_optimizer_params:
+  betas:
+  - 0.5
+  - 0.9
+  lr: 0.0002
+  weight_decay: 0.0
+discriminator_optimizer_type: Adam
+discriminator_params:
+  follow_official_norm: true
+  period_discriminator_params:
+    bias: true
+    channels: 32
+    downsample_scales:
+    - 3
+    - 3
+    - 3
+    - 3
+    - 1
+    in_channels: 1
+    kernel_sizes:
+    - 5
+    - 3
+    max_downsample_channels: 1024
+    nonlinear_activation: LeakyReLU
+    nonlinear_activation_params:
+      negative_slope: 0.1
+    out_channels: 1
+    use_spectral_norm: false
+    use_weight_norm: true
+  periods:
+  - 2
+  - 3
+  - 5
+  - 7
+  - 11
+  scale_discriminator_params:
+    bias: true
+    channels: 128
+    downsample_scales:
+    - 4
+    - 4
+    - 4
+    - 4
+    - 1
+    in_channels: 1
+    kernel_sizes:
+    - 15
+    - 41
+    - 5
+    - 3
+    max_downsample_channels: 1024
+    max_groups: 16
+    nonlinear_activation: LeakyReLU
+    nonlinear_activation_params:
+      negative_slope: 0.1
+    out_channels: 1
+  scale_downsample_pooling: AvgPool1d
+  scale_downsample_pooling_params:
+    kernel_size: 4
+    padding: 2
+    stride: 2
+  scales: 3
+discriminator_scheduler_params:
+  gamma: 0.5
+  milestones:
+  - 200000
+  - 400000
+  - 600000
+  - 800000
+discriminator_scheduler_type: MultiStepLR
+discriminator_train_start_steps: 0
+discriminator_type: HiFiGANMultiScaleMultiPeriodDiscriminator
+distributed: true
+dropout_features: 0.0
+eval_interval_steps: 100000
+feat_match_loss_params:
+  average_by_discriminators: false
+  average_by_layers: false
+  include_final_outputs: false
+frontend_mel_prediction_stop_steps: 200000
+frontend_params:
+  conformer_params:
+    activation_type: swish
+    attention_dim: 184
+    attention_dropout_rate: 0.2
+    attention_heads: 2
+    cnn_module_kernel: 31
+    concat_after: false
+    dropout_rate: 0.2
+    linear_units: 1536
+    macaron_style: true
+    normalize_before: true
+    num_blocks: 2
+    pos_enc_layer_type: rel_pos
+    positional_dropout_rate: 0.2
+    positionwise_conv_kernel_size: 3
+    positionwise_layer_type: conv1d
+    selfattention_layer_type: rel_selfattn
+    use_cnn_module: true
+  prompt_channels: 1024
+  vqvec_channels: 512
+generator_adv_loss_params:
+  average_by_discriminators: false
+generator_grad_norm: -1
+generator_optimizer_params:
+  betas:
+  - 0.5
+  - 0.9
+  lr: 0.0002
+  weight_decay: 0.0
+generator_optimizer_type: Adam
+generator_params:
+  bias: true
+  channels: 512
+  condition_dim: 1024
+  in_channels: 184
+  kernel_size: 7
+  nonlinear_activation: snakebeta-condition
+  out_channels: 1
+  resblock: '1'
+  resblock_dilations:
+  - - 1
+    - 3
+    - 5
+  - - 1
+    - 3
+    - 5
+  - - 1
+    - 3
+    - 5
+  resblock_kernel_sizes:
+  - 3
+  - 7
+  - 11
+  snake_logscale: true
+  upsample_kernel_sizes:
+  - 16
+  - 10
+  - 6
+  - 4
+  upsample_scales:
+  - 8
+  - 5
+  - 3
+  - 2
+  use_additional_convs: true
+  use_weight_norm: true
+generator_scheduler_params:
+  gamma: 0.5
+  milestones:
+  - 200000
+  - 400000
+  - 600000
+  - 800000
+generator_scheduler_type: MultiStepLR
+generator_train_start_steps: 1
+generator_type: BigVGAN
+hop_size: 240
+lambda_adv: 1.0
+lambda_aux: 45.0
+lambda_feat_match: 2.0
+lambda_frontend_mel_prediction: 60
+log_interval_steps: 1000
+max_num_frames: 3000
+mel_loss_params:
+  fft_size: 2048
+  fmax: 8000
+  fmin: 40
+  fs: 24000
+  hop_size: 300
+  log_base: null
+  num_mels: 80
+  win_length: 1200
+  window: hann
+min_num_frames: 600
+num_mels: 80
+num_save_intermediate_results: 4
+num_workers: 8
+outdir: exp/train_all_ctxv2w.v1
+pin_memory: true
+pretrain: ''
+prompt_fold_by_2: true
+prompt_net_type: ConvPromptPrenet
+rank: 0
+sampling_rate: 24000
+save_interval_steps: 10000
+use_feat_match_loss: true
+use_mel_loss: true
+use_stft_loss: false
+verbose: 1
+version: 0.5.3
+vq_codebook: feats/vqidx/codebook.npy
+win_length: 697
+world_size: 4