| audio: |
| eps: 1e-10 |
| fft_size: 2048 |
| filter_length: 1200 |
| fmax: 7600 |
| fmin: 80 |
| hop_size: 300 |
| log_base: 10.0 |
| num_mels: 80 |
| sampling_rate: 24000 |
| win_length: 1200 |
| window: hann |
| lang: |
| - de |
| model: |
| decoder: |
| conv_filter_size: 1024 |
| conv_kernel_size: |
| - 9 |
| - 1 |
| dropout: 0.2 |
| kind: styletts |
| n_head: 2 |
| n_layers: 6 |
| scln: true |
| dpe_emb_dim: 32 |
| emb_dim: 512 |
| emb_reduction: 1 |
| encoder: |
| fs2_dropout: 0.2 |
| fs2_head: 2 |
| fs2_layer: 4 |
| ve_n_bins: 256 |
| vp_dropout: 0.5 |
| vp_filter_size: 256 |
| vp_kernel_size: 3 |
| max_mel_len: 1500 |
| max_txt_len: 512 |
| min_mel_len: 100 |
| phones: '''-abcdefghijklmnopqrstuvwxyz' |
| punct_emb_dim: 16 |
| puncts: ' ,.;:-!?"' |
| resnet: |
| encoder_type: ASP |
| layers: |
| - 3 |
| - 4 |
| - 6 |
| - 3 |
| num_filters: |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| stats: |
| energy_max: 532.8609008789062 |
| energy_min: 0.0 |
| pitch_max: 946.5770747073245 |
| pitch_min: 56.861322708306595 |
| training: |
| betas: |
| - 0.0 |
| - 0.99 |
| eps: 1.0e-09 |
| grad_clip: 1.0 |
| weight_decay: 0.0 |
|
|