{ "text_encoder": { "input_size": 512, "encoder_layers": 8, "encoder_heads": 2, "encoder_hidden": 384, "encoder_conv_kernel_size": [ 9, 1 ], "encoder_dropout": 0.2, "vocab_size": 366, "num_langs": 2, "num_speakers": 2, "max_position": 3072, "output_size": 512 }, "duration_predictor": { "input_size": 512, "hidden_size": 256, "k_size": 5, "layers": 3, "dropout_rate": 0.2 }, "mel_encoder": { "mel_bins": 100, "hidden_size": 512, "num_layers": 6, "kernel_size": 5, "dropout_rate": 0.2 }, "mel_decoder": { "mel_bins": 100, "hidden_size": 512, "num_layers": 6, "kernel_size": 5, "dropout_rate": 0.2 }, "post_net": { "n_mel_channels": 100, "postnet_embedding_dim": 512, "postnet_kernel_size": 9, "postnet_n_convolutions": 5 }, "wav_encoder": { "mel_bins": 100, "filter_length": 1024, "hop_length": 256, "win_length": 1024, "sampling_rate": 24000, "normalize": false, "power": 1, "norm": null, "center": true }, "wav_decoder": { "vocos_model_id": "charactr/vocos-mel-24khz" }, "delta": 0.2, "look_ahead": 3 }