| plbert_params: |
| vocab_size: 178 |
| hidden_size: 768 |
| num_attention_heads: 12 |
| intermediate_size: 2048 |
| max_position_embeddings: 512 |
| num_hidden_layers: 12 |
| dropout: 0.1 |
|
|
|
|
| model_params: |
| multispeaker: true |
| vocab: "$;:,.!?¡¿—…\"«»“” ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ" |
|
|
| dim_in: 64 |
| hidden_dim: 512 |
| max_conv_dim: 512 |
| n_layer: 3 |
| n_mels: 80 |
|
|
| n_token: 178 |
| max_dur: 50 |
| style_dim: 128 |
| |
| dropout: 0.0 |
|
|
| |
| decoder: |
| type: 'hifigan' |
| resblock_kernel_sizes: [3,7,11] |
| upsample_rates : [10,5,3,2] |
| upsample_initial_channel: 512 |
| resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]] |
| upsample_kernel_sizes: [20,10,6,4] |
| |
| |
| |
| diffusion: |
| embedding_mask_proba: 0.1 |
| |
| transformer: |
| num_layers: 3 |
| num_heads: 8 |
| head_features: 64 |
| multiplier: 2 |
|
|
| |
| dist: |
| sigma_data: 0.19988229232390187 |
| mean: -4.0 |
| std: 4.0 |
| |